Files
nezha_domains/service/singleton/singleton.go
T
奶爸 e61772e858 feat(v2.0.0): tsdb (#1162)
* feat: tsdb

* fix(ci): remove --parseGoList=false from swag init to fix dependency resolution

* fix(ci): fix swag init directory and temporary remove s390x support due to cgo issues

* fix(ci): fix swag init output directory to cmd/dashboard/docs

* fix(ci): set GOTOOLCHAIN=auto for gosec

* feat: add system storage maintenance for SQLite and TSDB

* shit

* feat: add s390x support and improve service monitoring

* ci: upgrade goreleaser-cross image to v1.25

* ci: add libzstd-dev:s390x for cross-compilation

* ci: build libzstd for s390x from source

* ci: add libzstd_linux_s390x.go for gozstd linking

* ci: use vendor mode for s390x gozstd build

* ci: clone zstd source for s390x build

* refactor(tsdb): rename MaxDiskUsageGB to MinFreeDiskSpaceGB and optimize queries

- Rename config to accurately reflect VictoriaMetrics behavior: minimum free disk space threshold
- Add QueryServiceHistoryByServerID for batch query optimization
- Fix hasStatus to avoid false status counting when only delay data exists
- Fix service aggregation boundary: use successCount*2 >= count
- Fix serviceID parsing with strconv.ParseUint error handling
- Add TagFiltersCacheSize for better query performance

* feat(api): add server metrics endpoint and simplify service history response

- Add /server/:id/metrics API for querying TSDB server metrics
- Simplify getServiceHistory by removing redundant data conversion
- Change AvgDelay type from float32 to float64
- Remove generated swagger docs (to be regenerated)
- Update TSDB query, writer and tests

* chore: 临时禁用不支持前端

* ci: cache zstd build for s390x to speed up CI

* fix(tsdb): fix race conditions, data correctness and optimize performance

- Fix TOCTOU race between IsClosed() and write/query by holding RLock
- Fix delay=0 excluded from stats by using hasDelay flag instead of value > 0
- Fix fmt.Sscanf -> strconv.ParseUint for server_id parsing with error logging
- Fix buffer unbounded growth by flushing inside lock when over maxSize
- Split makeMetricRow into makeServerMetricRow/makeServiceMetricRow
- Extract InitGlobalSettings() from Open() for VictoriaMetrics globals
- Remove redundant instance/GetInstance/SetInstance singleton
- Add error logging for silently skipped block decode errors
- Optimize WriteBatch* to build all rows in single write call
- Optimize downsample to use linear scan instead of map for sorted data
- Optimize query slice reuse across block iterations

* 服务添加DisplayIndex (#1166)

* 服务添加DisplayIndex

* 根据ai建议修改

---------

Co-authored-by: huYang <306061454@qq.com>

* fix(tsdb): restore SQLite fallback and monthly status reload on restart

- Restore ServiceHistory model and SQLite write fallback when TSDB is disabled
- Reload monthlyStatus (30-day) and serviceStatusToday from TSDB/SQLite on startup
- Add SQLite fallback query for /service/:id/history and /server/:id/service
- Remove breaking GET /service/:id endpoint, keep /service/:id/history only
- Add QueryServiceDailyStats to TSDB for per-day aggregation
- Add tests for monthly status and today stats loading from both TSDB and SQLite
- Migrate ServiceHistory table only when TSDB is disabled

* ci: exclude false-positive gosec rules G117, G703, G704

* feat(api): expose tsdb_enabled in setting response

* ci: restore G115 exclusion accidentally dropped in previous commit

* fix: update version numbers for OfficialAdmin and Official templates

* chore: upgrade frontend

* chore: upgrade frontend

---------

Co-authored-by: 胡说丷刂 <34758853+laosan-xx@users.noreply.github.com>
Co-authored-by: huYang <306061454@qq.com>
2026-02-15 13:13:33 +08:00

262 lines
6.7 KiB
Go

package singleton
import (
_ "embed"
"iter"
"log"
"maps"
"slices"
"sync"
"time"
"github.com/gin-gonic/gin"
"github.com/patrickmn/go-cache"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
"sigs.k8s.io/yaml"
"github.com/nezhahq/nezha/model"
"github.com/nezhahq/nezha/pkg/utils"
)
var Version = "debug"
var (
Cache *cache.Cache
DB *gorm.DB
Loc *time.Location
FrontendTemplates []model.FrontendTemplate
DashboardBootTime = uint64(time.Now().Unix())
ServerShared *ServerClass
ServiceSentinelShared *ServiceSentinel
DDNSShared *DDNSClass
NotificationShared *NotificationClass
NATShared *NATClass
CronShared *CronClass
)
//go:embed frontend-templates.yaml
var frontendTemplatesYAML []byte
func InitTimezoneAndCache() error {
var err error
Loc, err = time.LoadLocation(Conf.Location)
if err != nil {
return err
}
Cache = cache.New(5*time.Minute, 10*time.Minute)
return nil
}
// LoadSingleton 加载子服务并执行
func LoadSingleton(bus chan<- *model.Service) (err error) {
initI18n() // 加载本地化服务
initUser() // 加载用户ID绑定表
NATShared = NewNATClass()
DDNSShared = NewDDNSClass()
NotificationShared = NewNotificationClass()
ServerShared = NewServerClass()
CronShared = NewCronClass()
// 最后初始化 ServiceSentinel
ServiceSentinelShared, err = NewServiceSentinel(bus)
return
}
// InitFrontendTemplates 从内置文件中加载FrontendTemplates
func InitFrontendTemplates() error {
err := yaml.Unmarshal(frontendTemplatesYAML, &FrontendTemplates)
if err != nil {
return err
}
return nil
}
// InitDBFromPath 从给出的文件路径中加载数据库
func InitDBFromPath(path string) error {
var err error
DB, err = gorm.Open(sqlite.Open(path), &gorm.Config{
CreateBatchSize: 200,
})
if err != nil {
return err
}
if Conf.Debug {
DB = DB.Debug()
}
err = DB.AutoMigrate(model.Server{}, model.User{}, model.ServerGroup{}, model.NotificationGroup{},
model.Notification{}, model.AlertRule{}, model.Service{}, model.NotificationGroupNotification{},
model.Cron{}, model.Transfer{}, model.ServerGroupServer{},
model.NAT{}, model.DDNSProfile{}, model.NotificationGroupNotification{},
model.WAF{}, model.Oauth2Bind{})
if err != nil {
return err
}
return nil
}
// RecordTransferHourlyUsage 对流量记录进行打点
func RecordTransferHourlyUsage(servers ...*model.Server) {
now := time.Now()
nowTrimSeconds := time.Date(now.Year(), now.Month(), now.Day(), now.Hour(), 0, 0, 0, now.Location())
var txs []model.Transfer
var slist iter.Seq[*model.Server]
if len(servers) > 0 {
slist = slices.Values(servers)
} else {
slist = utils.Seq2To1(ServerShared.Range)
}
for server := range slist {
tx := model.Transfer{
ServerID: server.ID,
In: utils.SubUintChecked(server.State.NetInTransfer, server.PrevTransferInSnapshot),
Out: utils.SubUintChecked(server.State.NetOutTransfer, server.PrevTransferOutSnapshot),
}
if tx.In == 0 && tx.Out == 0 {
continue
}
server.PrevTransferInSnapshot = server.State.NetInTransfer
server.PrevTransferOutSnapshot = server.State.NetOutTransfer
tx.CreatedAt = nowTrimSeconds
txs = append(txs, tx)
}
if len(txs) == 0 {
return
}
log.Printf("NEZHA>> Saved traffic metrics to database. Affected %d row(s), Error: %v", len(txs), DB.Create(txs).Error)
}
// CleanMonitorHistory 清理流量记录(TSDB 有自己的保留策略)
func CleanMonitorHistory() {
// 清理已被删除的服务器的流量记录
DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (SELECT `id` FROM servers)")
// 计算可清理流量记录的时长
var allServerKeep time.Time
specialServerKeep := make(map[uint64]time.Time)
var specialServerIDs []uint64
var alerts []model.AlertRule
DB.Find(&alerts)
for _, alert := range alerts {
for _, rule := range alert.Rules {
// 是不是流量记录规则
if !rule.IsTransferDurationRule() {
continue
}
dataCouldRemoveBefore := rule.GetTransferDurationStart().UTC()
// 判断规则影响的机器范围
if rule.Cover == model.RuleCoverAll {
// 更新全局可以清理的数据点
if allServerKeep.IsZero() || allServerKeep.After(dataCouldRemoveBefore) {
allServerKeep = dataCouldRemoveBefore
}
} else {
// 更新特定机器可以清理数据点
for id := range rule.Ignore {
if specialServerKeep[id].IsZero() || specialServerKeep[id].After(dataCouldRemoveBefore) {
specialServerKeep[id] = dataCouldRemoveBefore
specialServerIDs = append(specialServerIDs, id)
}
}
}
}
}
for id, couldRemove := range specialServerKeep {
DB.Unscoped().Delete(&model.Transfer{}, "server_id = ? AND datetime(`created_at`) < datetime(?)", id, couldRemove)
}
if allServerKeep.IsZero() {
DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (?)", specialServerIDs)
} else {
DB.Unscoped().Delete(&model.Transfer{}, "server_id NOT IN (?) AND datetime(`created_at`) < datetime(?)", specialServerIDs, allServerKeep)
}
}
// PerformMaintenance 执行系统维护(SQLite VACUUM 和 TSDB 维护)
func PerformMaintenance() {
log.Println("NEZHA>> Starting system maintenance...")
// 1. SQLite 维护
if DB != nil {
log.Println("NEZHA>> SQLite: Starting VACUUM...")
if err := DB.Exec("VACUUM").Error; err != nil {
log.Printf("NEZHA>> SQLite: VACUUM failed: %v", err)
} else {
log.Println("NEZHA>> SQLite: VACUUM completed")
}
}
// 2. TSDB 维护
if TSDBEnabled() {
TSDBShared.Maintenance()
}
log.Println("NEZHA>> System maintenance completed")
}
// IPDesensitize 根据设置选择是否对IP进行打码处理 返回处理后的IP(关闭打码则返回原IP)
func IPDesensitize(ip string) string {
if Conf.EnablePlainIPInNotification {
return ip
}
return utils.IPDesensitize(ip)
}
type class[K comparable, V model.CommonInterface] struct {
list map[K]V
listMu sync.RWMutex
sortedList []V
sortedListMu sync.RWMutex
}
func (c *class[K, V]) Get(id K) (s V, ok bool) {
c.listMu.RLock()
defer c.listMu.RUnlock()
s, ok = c.list[id]
return
}
func (c *class[K, V]) GetList() map[K]V {
c.listMu.RLock()
defer c.listMu.RUnlock()
return maps.Clone(c.list)
}
func (c *class[K, V]) GetSortedList() []V {
c.sortedListMu.RLock()
defer c.sortedListMu.RUnlock()
return slices.Clone(c.sortedList)
}
func (c *class[K, V]) Range(fn func(k K, v V) bool) {
c.listMu.RLock()
defer c.listMu.RUnlock()
for k, v := range c.list {
if !fn(k, v) {
break
}
}
}
func (c *class[K, V]) CheckPermission(ctx *gin.Context, idList iter.Seq[K]) bool {
c.listMu.RLock()
defer c.listMu.RUnlock()
for id := range idList {
if s, ok := c.list[id]; ok {
if !s.HasPermission(ctx) {
return false
}
}
}
return true
}