feat(v2.0.0): tsdb (#1162)

* feat: tsdb

* fix(ci): remove --parseGoList=false from swag init to fix dependency resolution

* fix(ci): fix swag init directory and temporary remove s390x support due to cgo issues

* fix(ci): fix swag init output directory to cmd/dashboard/docs

* fix(ci): set GOTOOLCHAIN=auto for gosec

* feat: add system storage maintenance for SQLite and TSDB

* shit

* feat: add s390x support and improve service monitoring

* ci: upgrade goreleaser-cross image to v1.25

* ci: add libzstd-dev:s390x for cross-compilation

* ci: build libzstd for s390x from source

* ci: add libzstd_linux_s390x.go for gozstd linking

* ci: use vendor mode for s390x gozstd build

* ci: clone zstd source for s390x build

* refactor(tsdb): rename MaxDiskUsageGB to MinFreeDiskSpaceGB and optimize queries

- Rename config to accurately reflect VictoriaMetrics behavior: minimum free disk space threshold
- Add QueryServiceHistoryByServerID for batch query optimization
- Fix hasStatus to avoid false status counting when only delay data exists
- Fix service aggregation boundary: use successCount*2 >= count
- Fix serviceID parsing with strconv.ParseUint error handling
- Add TagFiltersCacheSize for better query performance

* feat(api): add server metrics endpoint and simplify service history response

- Add /server/:id/metrics API for querying TSDB server metrics
- Simplify getServiceHistory by removing redundant data conversion
- Change AvgDelay type from float32 to float64
- Remove generated swagger docs (to be regenerated)
- Update TSDB query, writer and tests

* chore: 临时禁用不支持前端

* ci: cache zstd build for s390x to speed up CI

* fix(tsdb): fix race conditions, data correctness and optimize performance

- Fix TOCTOU race between IsClosed() and write/query by holding RLock
- Fix delay=0 excluded from stats by using hasDelay flag instead of value > 0
- Fix fmt.Sscanf -> strconv.ParseUint for server_id parsing with error logging
- Fix buffer unbounded growth by flushing inside lock when over maxSize
- Split makeMetricRow into makeServerMetricRow/makeServiceMetricRow
- Extract InitGlobalSettings() from Open() for VictoriaMetrics globals
- Remove redundant instance/GetInstance/SetInstance singleton
- Add error logging for silently skipped block decode errors
- Optimize WriteBatch* to build all rows in single write call
- Optimize downsample to use linear scan instead of map for sorted data
- Optimize query slice reuse across block iterations

* 服务添加DisplayIndex (#1166)

* 服务添加DisplayIndex

* 根据ai建议修改

---------

Co-authored-by: huYang <306061454@qq.com>

* fix(tsdb): restore SQLite fallback and monthly status reload on restart

- Restore ServiceHistory model and SQLite write fallback when TSDB is disabled
- Reload monthlyStatus (30-day) and serviceStatusToday from TSDB/SQLite on startup
- Add SQLite fallback query for /service/:id/history and /server/:id/service
- Remove breaking GET /service/:id endpoint, keep /service/:id/history only
- Add QueryServiceDailyStats to TSDB for per-day aggregation
- Add tests for monthly status and today stats loading from both TSDB and SQLite
- Migrate ServiceHistory table only when TSDB is disabled

* ci: exclude false-positive gosec rules G117, G703, G704

* feat(api): expose tsdb_enabled in setting response

* ci: restore G115 exclusion accidentally dropped in previous commit

* fix: update version numbers for OfficialAdmin and Official templates

* chore: upgrade frontend

* chore: upgrade frontend

---------

Co-authored-by: 胡说丷刂 <34758853+laosan-xx@users.noreply.github.com>
Co-authored-by: huYang <306061454@qq.com>
This commit is contained in:
奶爸
2026-02-15 13:13:33 +08:00
committed by GitHub
parent 4c4758207d
commit e61772e858
28 changed files with 3054 additions and 221 deletions
+87
View File
@@ -12,6 +12,7 @@ import (
"gorm.io/gorm"
"github.com/nezhahq/nezha/model"
"github.com/nezhahq/nezha/pkg/tsdb"
pb "github.com/nezhahq/nezha/proto"
"github.com/nezhahq/nezha/service/singleton"
)
@@ -366,3 +367,89 @@ func batchMoveServer(c *gin.Context) (any, error) {
return nil, nil
}
var serverMetricMap = map[string]tsdb.MetricType{
"cpu": tsdb.MetricServerCPU,
"memory": tsdb.MetricServerMemory,
"swap": tsdb.MetricServerSwap,
"disk": tsdb.MetricServerDisk,
"net_in_speed": tsdb.MetricServerNetInSpeed,
"net_out_speed": tsdb.MetricServerNetOutSpeed,
"net_in_transfer": tsdb.MetricServerNetInTransfer,
"net_out_transfer": tsdb.MetricServerNetOutTransfer,
"load1": tsdb.MetricServerLoad1,
"load5": tsdb.MetricServerLoad5,
"load15": tsdb.MetricServerLoad15,
"tcp_conn": tsdb.MetricServerTCPConn,
"udp_conn": tsdb.MetricServerUDPConn,
"process_count": tsdb.MetricServerProcessCount,
"temperature": tsdb.MetricServerTemperature,
"uptime": tsdb.MetricServerUptime,
"gpu": tsdb.MetricServerGPU,
}
// Get server metrics history
// @Summary Get server metrics history
// @Security BearerAuth
// @Schemes
// @Description Get server metrics history for a specific server
// @Tags common
// @param id path uint true "Server ID"
// @param metric query string true "Metric name: cpu, memory, swap, disk, net_in_speed, net_out_speed, net_in_transfer, net_out_transfer, load1, load5, load15, tcp_conn, udp_conn, process_count, temperature, uptime, gpu"
// @param period query string false "Time period: 1d, 7d, 30d (default: 1d)"
// @Produce json
// @Success 200 {object} model.CommonResponse[model.ServerMetricsResponse]
// @Router /server/{id}/metrics [get]
func getServerMetrics(c *gin.Context) (*model.ServerMetricsResponse, error) {
idStr := c.Param("id")
serverID, err := strconv.ParseUint(idStr, 10, 64)
if err != nil {
return nil, err
}
server, ok := singleton.ServerShared.Get(serverID)
if !ok {
return nil, singleton.Localizer.ErrorT("server not found")
}
_, isMember := c.Get(model.CtxKeyAuthorizedUser)
if server.HideForGuest && !isMember {
return nil, singleton.Localizer.ErrorT("unauthorized")
}
metricName := c.Query("metric")
metricType, ok := serverMetricMap[metricName]
if !ok {
return nil, singleton.Localizer.ErrorT("invalid metric name")
}
periodStr := c.DefaultQuery("period", "1d")
period, err := tsdb.ParseQueryPeriod(periodStr)
if err != nil {
return nil, err
}
if !isMember && period != tsdb.Period1Day {
return nil, singleton.Localizer.ErrorT("unauthorized: only 1d data available for guests")
}
response := &model.ServerMetricsResponse{
ServerID: serverID,
ServerName: server.Name,
Metric: metricName,
DataPoints: make([]model.ServerMetricsDataPoint, 0),
}
if !singleton.TSDBEnabled() {
return response, nil
}
points, err := singleton.TSDBShared.QueryServerMetrics(serverID, metricType, period)
if err != nil {
return nil, err
}
response.DataPoints = points
return response, nil
}