feat(v2.0.0): tsdb (#1162)

* feat: tsdb

* fix(ci): remove --parseGoList=false from swag init to fix dependency resolution

* fix(ci): fix swag init directory and temporary remove s390x support due to cgo issues

* fix(ci): fix swag init output directory to cmd/dashboard/docs

* fix(ci): set GOTOOLCHAIN=auto for gosec

* feat: add system storage maintenance for SQLite and TSDB

* shit

* feat: add s390x support and improve service monitoring

* ci: upgrade goreleaser-cross image to v1.25

* ci: add libzstd-dev:s390x for cross-compilation

* ci: build libzstd for s390x from source

* ci: add libzstd_linux_s390x.go for gozstd linking

* ci: use vendor mode for s390x gozstd build

* ci: clone zstd source for s390x build

* refactor(tsdb): rename MaxDiskUsageGB to MinFreeDiskSpaceGB and optimize queries

- Rename config to accurately reflect VictoriaMetrics behavior: minimum free disk space threshold
- Add QueryServiceHistoryByServerID for batch query optimization
- Fix hasStatus to avoid false status counting when only delay data exists
- Fix service aggregation boundary: use successCount*2 >= count
- Fix serviceID parsing with strconv.ParseUint error handling
- Add TagFiltersCacheSize for better query performance

* feat(api): add server metrics endpoint and simplify service history response

- Add /server/:id/metrics API for querying TSDB server metrics
- Simplify getServiceHistory by removing redundant data conversion
- Change AvgDelay type from float32 to float64
- Remove generated swagger docs (to be regenerated)
- Update TSDB query, writer and tests

* chore: 临时禁用不支持前端

* ci: cache zstd build for s390x to speed up CI

* fix(tsdb): fix race conditions, data correctness and optimize performance

- Fix TOCTOU race between IsClosed() and write/query by holding RLock
- Fix delay=0 excluded from stats by using hasDelay flag instead of value > 0
- Fix fmt.Sscanf -> strconv.ParseUint for server_id parsing with error logging
- Fix buffer unbounded growth by flushing inside lock when over maxSize
- Split makeMetricRow into makeServerMetricRow/makeServiceMetricRow
- Extract InitGlobalSettings() from Open() for VictoriaMetrics globals
- Remove redundant instance/GetInstance/SetInstance singleton
- Add error logging for silently skipped block decode errors
- Optimize WriteBatch* to build all rows in single write call
- Optimize downsample to use linear scan instead of map for sorted data
- Optimize query slice reuse across block iterations

* 服务添加DisplayIndex (#1166)

* 服务添加DisplayIndex

* 根据ai建议修改

---------

Co-authored-by: huYang <306061454@qq.com>

* fix(tsdb): restore SQLite fallback and monthly status reload on restart

- Restore ServiceHistory model and SQLite write fallback when TSDB is disabled
- Reload monthlyStatus (30-day) and serviceStatusToday from TSDB/SQLite on startup
- Add SQLite fallback query for /service/:id/history and /server/:id/service
- Remove breaking GET /service/:id endpoint, keep /service/:id/history only
- Add QueryServiceDailyStats to TSDB for per-day aggregation
- Add tests for monthly status and today stats loading from both TSDB and SQLite
- Migrate ServiceHistory table only when TSDB is disabled

* ci: exclude false-positive gosec rules G117, G703, G704

* feat(api): expose tsdb_enabled in setting response

* ci: restore G115 exclusion accidentally dropped in previous commit

* fix: update version numbers for OfficialAdmin and Official templates

* chore: upgrade frontend

* chore: upgrade frontend

---------

Co-authored-by: 胡说丷刂 <34758853+laosan-xx@users.noreply.github.com>
Co-authored-by: huYang <306061454@qq.com>
This commit is contained in:
奶爸
2026-02-15 13:13:33 +08:00
committed by GitHub
parent 4c4758207d
commit e61772e858
28 changed files with 3054 additions and 221 deletions
+336
View File
@@ -0,0 +1,336 @@
package singleton
import (
"os"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
"github.com/nezhahq/nezha/model"
"github.com/nezhahq/nezha/pkg/tsdb"
)
func newTestSentinel(serviceIDs []uint64) *ServiceSentinel {
ss := &ServiceSentinel{
serviceStatusToday: make(map[uint64]*_TodayStatsOfService),
monthlyStatus: make(map[uint64]*serviceResponseItem),
}
for _, id := range serviceIDs {
ss.serviceStatusToday[id] = &_TodayStatsOfService{}
ss.monthlyStatus[id] = &serviceResponseItem{
service: &model.Service{Common: model.Common{ID: id}},
ServiceResponseItem: model.ServiceResponseItem{
Delay: &[30]float64{},
Up: &[30]uint64{},
Down: &[30]uint64{},
},
}
}
return ss
}
func setupTestDB(t *testing.T) func() {
t.Helper()
var err error
DB, err = gorm.Open(sqlite.Open(":memory:"), &gorm.Config{})
require.NoError(t, err)
require.NoError(t, DB.AutoMigrate(model.ServiceHistory{}))
return func() { DB = nil }
}
func setupTestTSDB(t *testing.T) (*tsdb.TSDB, func()) {
t.Helper()
tempDir, err := os.MkdirTemp("", "tsdb_sentinel_test")
require.NoError(t, err)
config := &tsdb.Config{
DataPath: filepath.Join(tempDir, "tsdb"),
RetentionDays: 30,
MinFreeDiskSpaceGB: 1,
DedupInterval: time.Second,
}
db, err := tsdb.Open(config)
require.NoError(t, err)
TSDBShared = db
return db, func() {
db.Close()
TSDBShared = nil
os.RemoveAll(tempDir)
}
}
func TestLoadMonthlyStatusFromDB(t *testing.T) {
cleanup := setupTestDB(t)
defer cleanup()
year, month, day := time.Now().Date()
today := time.Date(year, month, day, 0, 0, 0, 0, time.UTC)
serviceID := uint64(1)
ss := newTestSentinel([]uint64{serviceID})
DB.Create(&model.ServiceHistory{
ServiceID: serviceID,
ServerID: 0,
AvgDelay: 10.0,
Up: 5,
Down: 1,
CreatedAt: today.Add(-25 * time.Hour),
})
DB.Create(&model.ServiceHistory{
ServiceID: serviceID,
ServerID: 0,
AvgDelay: 20.0,
Up: 3,
Down: 2,
CreatedAt: today.Add(-25 * time.Hour),
})
DB.Create(&model.ServiceHistory{
ServiceID: serviceID,
ServerID: 0,
AvgDelay: 30.0,
Up: 10,
Down: 0,
CreatedAt: today.Add(-49 * time.Hour),
})
ss.loadMonthlyStatusFromDB(today)
ms := ss.monthlyStatus[serviceID]
// day -1: index 27, two records with AvgDelay 10 and 20
assert.InDelta(t, 15.0, ms.Delay[27], 0.01)
assert.Equal(t, uint64(8), ms.Up[27])
assert.Equal(t, uint64(3), ms.Down[27])
// day -2: index 26
assert.InDelta(t, 30.0, ms.Delay[26], 0.01)
assert.Equal(t, uint64(10), ms.Up[26])
assert.Equal(t, uint64(0), ms.Down[26])
// totals
assert.Equal(t, uint64(18), ms.TotalUp)
assert.Equal(t, uint64(3), ms.TotalDown)
// today (index 29) should be untouched
assert.Equal(t, float64(0), ms.Delay[29])
assert.Equal(t, uint64(0), ms.Up[29])
}
func TestLoadMonthlyStatusFromDB_IgnoresToday(t *testing.T) {
cleanup := setupTestDB(t)
defer cleanup()
year, month, day := time.Now().Date()
today := time.Date(year, month, day, 0, 0, 0, 0, time.UTC)
serviceID := uint64(1)
ss := newTestSentinel([]uint64{serviceID})
DB.Create(&model.ServiceHistory{
ServiceID: serviceID,
ServerID: 0,
AvgDelay: 50.0,
Up: 100,
Down: 5,
CreatedAt: today.Add(2 * time.Hour),
})
ss.loadMonthlyStatusFromDB(today)
ms := ss.monthlyStatus[serviceID]
assert.Equal(t, uint64(0), ms.TotalUp)
assert.Equal(t, uint64(0), ms.TotalDown)
}
func TestLoadMonthlyStatusFromDB_UnknownServiceIgnored(t *testing.T) {
cleanup := setupTestDB(t)
defer cleanup()
year, month, day := time.Now().Date()
today := time.Date(year, month, day, 0, 0, 0, 0, time.UTC)
ss := newTestSentinel([]uint64{1})
DB.Create(&model.ServiceHistory{
ServiceID: 999,
ServerID: 0,
AvgDelay: 10.0,
Up: 5,
Down: 1,
CreatedAt: today.Add(-25 * time.Hour),
})
ss.loadMonthlyStatusFromDB(today)
ms := ss.monthlyStatus[uint64(1)]
assert.Equal(t, uint64(0), ms.TotalUp)
}
func TestLoadTodayStatsFromDB(t *testing.T) {
cleanup := setupTestDB(t)
defer cleanup()
year, month, day := time.Now().Date()
today := time.Date(year, month, day, 0, 0, 0, 0, time.UTC)
serviceID := uint64(1)
ss := newTestSentinel([]uint64{serviceID})
DB.Create(&model.ServiceHistory{
ServiceID: serviceID,
ServerID: 0,
AvgDelay: 10.0,
Up: 5,
Down: 1,
CreatedAt: today.Add(1 * time.Hour),
})
DB.Create(&model.ServiceHistory{
ServiceID: serviceID,
ServerID: 0,
AvgDelay: 30.0,
Up: 3,
Down: 2,
CreatedAt: today.Add(2 * time.Hour),
})
ss.loadTodayStats(today)
st := ss.serviceStatusToday[serviceID]
assert.Equal(t, uint64(8), st.Up)
assert.Equal(t, uint64(3), st.Down)
assert.InDelta(t, 20.0, st.Delay, 0.01)
ms := ss.monthlyStatus[serviceID]
assert.Equal(t, uint64(8), ms.TotalUp)
assert.Equal(t, uint64(3), ms.TotalDown)
}
func TestLoadMonthlyStatusFromTSDB(t *testing.T) {
db, cleanup := setupTestTSDB(t)
defer cleanup()
year, month, day := time.Now().Date()
today := time.Date(year, month, day, 0, 0, 0, 0, time.UTC)
serviceID := uint64(1)
services := []*model.Service{{Common: model.Common{ID: serviceID}}}
ss := newTestSentinel([]uint64{serviceID})
yesterday := today.Add(-25 * time.Hour)
for i := 0; i < 5; i++ {
ts := yesterday.Add(time.Duration(i) * time.Minute)
require.NoError(t, db.WriteServiceMetrics(&tsdb.ServiceMetrics{
ServiceID: serviceID,
ServerID: 1,
Timestamp: ts,
Delay: float64(10 + i),
Successful: true,
}))
}
for i := 0; i < 3; i++ {
require.NoError(t, db.WriteServiceMetrics(&tsdb.ServiceMetrics{
ServiceID: serviceID,
ServerID: 1,
Timestamp: yesterday.Add(time.Duration(i+10) * time.Minute),
Delay: float64(20 + i),
Successful: false,
}))
}
db.Flush()
ss.loadMonthlyStatusFromTSDB(services, today)
ms := ss.monthlyStatus[serviceID]
// day -1: dayIndex 28
assert.Equal(t, uint64(5), ms.Up[28])
assert.Equal(t, uint64(3), ms.Down[28])
assert.Equal(t, uint64(5), ms.TotalUp)
assert.Equal(t, uint64(3), ms.TotalDown)
assert.Greater(t, ms.Delay[28], float64(0))
// today (index 29) should be untouched
assert.Equal(t, uint64(0), ms.Up[29])
}
func TestLoadTodayStatsFromTSDB(t *testing.T) {
db, cleanup := setupTestTSDB(t)
defer cleanup()
year, month, day := time.Now().Date()
today := time.Date(year, month, day, 0, 0, 0, 0, time.UTC)
serviceID := uint64(1)
ss := newTestSentinel([]uint64{serviceID})
now := time.Now()
for i := 0; i < 4; i++ {
ts := now.Add(-time.Duration(i) * time.Minute)
require.NoError(t, db.WriteServiceMetrics(&tsdb.ServiceMetrics{
ServiceID: serviceID,
ServerID: 1,
Timestamp: ts,
Delay: float64(10 + i),
Successful: true,
}))
}
for i := 0; i < 2; i++ {
ts := now.Add(-time.Duration(i+10) * time.Minute)
require.NoError(t, db.WriteServiceMetrics(&tsdb.ServiceMetrics{
ServiceID: serviceID,
ServerID: 1,
Timestamp: ts,
Delay: 0,
Successful: false,
}))
}
db.Flush()
ss.loadTodayStats(today)
st := ss.serviceStatusToday[serviceID]
assert.Greater(t, st.Up, uint64(0))
assert.Greater(t, st.Down, uint64(0))
ms := ss.monthlyStatus[serviceID]
assert.Equal(t, st.Up, ms.TotalUp)
assert.Equal(t, st.Down, ms.TotalDown)
}
func TestLoadMonthlyStatusFromTSDB_NoDoubleCountToday(t *testing.T) {
db, cleanup := setupTestTSDB(t)
defer cleanup()
year, month, day := time.Now().Date()
today := time.Date(year, month, day, 0, 0, 0, 0, time.UTC)
serviceID := uint64(1)
services := []*model.Service{{Common: model.Common{ID: serviceID}}}
ss := newTestSentinel([]uint64{serviceID})
now := time.Now()
for i := 0; i < 5; i++ {
require.NoError(t, db.WriteServiceMetrics(&tsdb.ServiceMetrics{
ServiceID: serviceID,
ServerID: 1,
Timestamp: now.Add(-time.Duration(i) * time.Minute),
Delay: 10.0,
Successful: true,
}))
}
db.Flush()
ss.loadMonthlyStatusFromTSDB(services, today)
totalAfterMonthly := ss.monthlyStatus[serviceID].TotalUp
ss.loadTodayStats(today)
totalAfterToday := ss.monthlyStatus[serviceID].TotalUp
assert.Equal(t, totalAfterMonthly+ss.serviceStatusToday[serviceID].Up, totalAfterToday)
}