mirror of
https://github.com/Buriburizaem0n/nezha_domains.git
synced 2026-05-06 05:38:50 +00:00
feat(v2.0.0): tsdb (#1162)
* feat: tsdb * fix(ci): remove --parseGoList=false from swag init to fix dependency resolution * fix(ci): fix swag init directory and temporary remove s390x support due to cgo issues * fix(ci): fix swag init output directory to cmd/dashboard/docs * fix(ci): set GOTOOLCHAIN=auto for gosec * feat: add system storage maintenance for SQLite and TSDB * shit * feat: add s390x support and improve service monitoring * ci: upgrade goreleaser-cross image to v1.25 * ci: add libzstd-dev:s390x for cross-compilation * ci: build libzstd for s390x from source * ci: add libzstd_linux_s390x.go for gozstd linking * ci: use vendor mode for s390x gozstd build * ci: clone zstd source for s390x build * refactor(tsdb): rename MaxDiskUsageGB to MinFreeDiskSpaceGB and optimize queries - Rename config to accurately reflect VictoriaMetrics behavior: minimum free disk space threshold - Add QueryServiceHistoryByServerID for batch query optimization - Fix hasStatus to avoid false status counting when only delay data exists - Fix service aggregation boundary: use successCount*2 >= count - Fix serviceID parsing with strconv.ParseUint error handling - Add TagFiltersCacheSize for better query performance * feat(api): add server metrics endpoint and simplify service history response - Add /server/:id/metrics API for querying TSDB server metrics - Simplify getServiceHistory by removing redundant data conversion - Change AvgDelay type from float32 to float64 - Remove generated swagger docs (to be regenerated) - Update TSDB query, writer and tests * chore: 临时禁用不支持前端 * ci: cache zstd build for s390x to speed up CI * fix(tsdb): fix race conditions, data correctness and optimize performance - Fix TOCTOU race between IsClosed() and write/query by holding RLock - Fix delay=0 excluded from stats by using hasDelay flag instead of value > 0 - Fix fmt.Sscanf -> strconv.ParseUint for server_id parsing with error logging - Fix buffer unbounded growth by flushing inside lock when over maxSize - Split makeMetricRow into makeServerMetricRow/makeServiceMetricRow - Extract InitGlobalSettings() from Open() for VictoriaMetrics globals - Remove redundant instance/GetInstance/SetInstance singleton - Add error logging for silently skipped block decode errors - Optimize WriteBatch* to build all rows in single write call - Optimize downsample to use linear scan instead of map for sorted data - Optimize query slice reuse across block iterations * 服务添加DisplayIndex (#1166) * 服务添加DisplayIndex * 根据ai建议修改 --------- Co-authored-by: huYang <306061454@qq.com> * fix(tsdb): restore SQLite fallback and monthly status reload on restart - Restore ServiceHistory model and SQLite write fallback when TSDB is disabled - Reload monthlyStatus (30-day) and serviceStatusToday from TSDB/SQLite on startup - Add SQLite fallback query for /service/:id/history and /server/:id/service - Remove breaking GET /service/:id endpoint, keep /service/:id/history only - Add QueryServiceDailyStats to TSDB for per-day aggregation - Add tests for monthly status and today stats loading from both TSDB and SQLite - Migrate ServiceHistory table only when TSDB is disabled * ci: exclude false-positive gosec rules G117, G703, G704 * feat(api): expose tsdb_enabled in setting response * ci: restore G115 exclusion accidentally dropped in previous commit * fix: update version numbers for OfficialAdmin and Official templates * chore: upgrade frontend * chore: upgrade frontend --------- Co-authored-by: 胡说丷刂 <34758853+laosan-xx@users.noreply.github.com> Co-authored-by: huYang <306061454@qq.com>
This commit is contained in:
@@ -0,0 +1,664 @@
|
||||
package tsdb
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
|
||||
"github.com/nezhahq/nezha/model"
|
||||
)
|
||||
|
||||
// QueryPeriod 查询时间段
|
||||
type QueryPeriod string
|
||||
|
||||
const (
|
||||
Period1Day QueryPeriod = "1d"
|
||||
Period7Days QueryPeriod = "7d"
|
||||
Period30Days QueryPeriod = "30d"
|
||||
)
|
||||
|
||||
// ParseQueryPeriod 解析查询时间段
|
||||
func ParseQueryPeriod(s string) (QueryPeriod, error) {
|
||||
switch s {
|
||||
case "1d", "":
|
||||
return Period1Day, nil
|
||||
case "7d":
|
||||
return Period7Days, nil
|
||||
case "30d":
|
||||
return Period30Days, nil
|
||||
default:
|
||||
return "", fmt.Errorf("invalid period: %s, expected 1d, 7d, or 30d", s)
|
||||
}
|
||||
}
|
||||
|
||||
// Duration 返回时间段的时长
|
||||
func (p QueryPeriod) Duration() time.Duration {
|
||||
switch p {
|
||||
case Period7Days:
|
||||
return 7 * 24 * time.Hour
|
||||
case Period30Days:
|
||||
return 30 * 24 * time.Hour
|
||||
default:
|
||||
return 24 * time.Hour
|
||||
}
|
||||
}
|
||||
|
||||
// DownsampleInterval 返回降采样间隔
|
||||
// 1d: 5分钟一个点 (288个点)
|
||||
// 7d: 30分钟一个点 (336个点)
|
||||
// 30d: 2小时一个点 (360个点)
|
||||
func (p QueryPeriod) DownsampleInterval() time.Duration {
|
||||
switch p {
|
||||
case Period7Days:
|
||||
return 30 * time.Minute
|
||||
case Period30Days:
|
||||
return 2 * time.Hour
|
||||
default:
|
||||
return 5 * time.Minute
|
||||
}
|
||||
}
|
||||
|
||||
// Type aliases for model types used in tsdb package
|
||||
type (
|
||||
DataPoint = model.DataPoint
|
||||
ServiceHistorySummary = model.ServiceHistorySummary
|
||||
ServerServiceStats = model.ServerServiceStats
|
||||
ServiceHistoryResult = model.ServiceHistoryResponse
|
||||
MetricDataPoint = model.ServerMetricsDataPoint
|
||||
)
|
||||
|
||||
type rawDataPoint struct {
|
||||
timestamp int64
|
||||
value float64
|
||||
status float64
|
||||
hasDelay bool
|
||||
hasStatus bool
|
||||
}
|
||||
|
||||
func (db *TSDB) QueryServiceHistory(serviceID uint64, period QueryPeriod) (*ServiceHistoryResult, error) {
|
||||
db.mu.RLock()
|
||||
defer db.mu.RUnlock()
|
||||
if db.closed {
|
||||
return nil, fmt.Errorf("TSDB is closed")
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
tr := storage.TimeRange{
|
||||
MinTimestamp: now.Add(-period.Duration()).UnixMilli(),
|
||||
MaxTimestamp: now.UnixMilli(),
|
||||
}
|
||||
|
||||
serviceIDStr := strconv.FormatUint(serviceID, 10)
|
||||
|
||||
delayData, err := db.queryMetricByServiceID(MetricServiceDelay, serviceIDStr, tr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query delay data: %w", err)
|
||||
}
|
||||
|
||||
statusData, err := db.queryMetricByServiceID(MetricServiceStatus, serviceIDStr, tr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query status data: %w", err)
|
||||
}
|
||||
|
||||
result := &ServiceHistoryResult{
|
||||
ServiceID: serviceID,
|
||||
Servers: make([]ServerServiceStats, 0),
|
||||
}
|
||||
|
||||
serverDataMap := make(map[uint64]map[int64]*rawDataPoint)
|
||||
|
||||
for serverID, points := range delayData {
|
||||
if serverDataMap[serverID] == nil {
|
||||
serverDataMap[serverID] = make(map[int64]*rawDataPoint)
|
||||
}
|
||||
for _, p := range points {
|
||||
serverDataMap[serverID][p.timestamp] = &rawDataPoint{
|
||||
timestamp: p.timestamp,
|
||||
value: p.value,
|
||||
hasDelay: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for serverID, points := range statusData {
|
||||
if serverDataMap[serverID] == nil {
|
||||
serverDataMap[serverID] = make(map[int64]*rawDataPoint)
|
||||
}
|
||||
for _, p := range points {
|
||||
if existing, ok := serverDataMap[serverID][p.timestamp]; ok {
|
||||
existing.status = p.value
|
||||
existing.hasStatus = true
|
||||
} else {
|
||||
serverDataMap[serverID][p.timestamp] = &rawDataPoint{
|
||||
timestamp: p.timestamp,
|
||||
status: p.value,
|
||||
hasStatus: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for serverID, pointsMap := range serverDataMap {
|
||||
points := make([]rawDataPoint, 0, len(pointsMap))
|
||||
for _, p := range pointsMap {
|
||||
points = append(points, *p)
|
||||
}
|
||||
stats := calculateStats(points, period.DownsampleInterval())
|
||||
result.Servers = append(result.Servers, ServerServiceStats{
|
||||
ServerID: serverID,
|
||||
Stats: stats,
|
||||
})
|
||||
}
|
||||
|
||||
sort.Slice(result.Servers, func(i, j int) bool {
|
||||
return result.Servers[i].ServerID < result.Servers[j].ServerID
|
||||
})
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type DailyServiceStats struct {
|
||||
Up uint64
|
||||
Down uint64
|
||||
Delay float64
|
||||
}
|
||||
|
||||
func (db *TSDB) QueryServiceDailyStats(serviceID uint64, today time.Time, days int) ([]DailyServiceStats, error) {
|
||||
db.mu.RLock()
|
||||
defer db.mu.RUnlock()
|
||||
if db.closed {
|
||||
return nil, fmt.Errorf("TSDB is closed")
|
||||
}
|
||||
|
||||
stats := make([]DailyServiceStats, days)
|
||||
serviceIDStr := strconv.FormatUint(serviceID, 10)
|
||||
|
||||
start := today.AddDate(0, 0, -(days - 1))
|
||||
tr := storage.TimeRange{
|
||||
MinTimestamp: start.UnixMilli(),
|
||||
MaxTimestamp: today.UnixMilli(),
|
||||
}
|
||||
|
||||
statusData, err := db.queryMetricByServiceID(MetricServiceStatus, serviceIDStr, tr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
delayData, err := db.queryMetricByServiceID(MetricServiceDelay, serviceIDStr, tr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, points := range statusData {
|
||||
for _, p := range points {
|
||||
ts := time.UnixMilli(p.timestamp)
|
||||
dayIndex := (days - 1) - int(today.Sub(ts).Hours())/24
|
||||
if dayIndex < 0 || dayIndex >= days {
|
||||
continue
|
||||
}
|
||||
if p.value >= 0.5 {
|
||||
stats[dayIndex].Up++
|
||||
} else {
|
||||
stats[dayIndex].Down++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delayCount := make([]int, days)
|
||||
for _, points := range delayData {
|
||||
for _, p := range points {
|
||||
ts := time.UnixMilli(p.timestamp)
|
||||
dayIndex := (days - 1) - int(today.Sub(ts).Hours())/24
|
||||
if dayIndex < 0 || dayIndex >= days {
|
||||
continue
|
||||
}
|
||||
stats[dayIndex].Delay = (stats[dayIndex].Delay*float64(delayCount[dayIndex]) + p.value) / float64(delayCount[dayIndex]+1)
|
||||
delayCount[dayIndex]++
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
type metricPoint struct {
|
||||
timestamp int64
|
||||
value float64
|
||||
}
|
||||
|
||||
func (db *TSDB) queryMetricByServiceID(metric MetricType, serviceID string, tr storage.TimeRange) (map[uint64][]metricPoint, error) {
|
||||
tfs := storage.NewTagFilters()
|
||||
if err := tfs.Add(nil, []byte(metric), false, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := tfs.Add([]byte("service_id"), []byte(serviceID), false, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
deadline := uint64(time.Now().Add(30 * time.Second).Unix())
|
||||
|
||||
var search storage.Search
|
||||
search.Init(nil, db.storage, []*storage.TagFilters{tfs}, tr, 100000, deadline)
|
||||
defer search.MustClose()
|
||||
|
||||
result := make(map[uint64][]metricPoint)
|
||||
var timestamps []int64
|
||||
var values []float64
|
||||
|
||||
for search.NextMetricBlock() {
|
||||
mbr := search.MetricBlockRef
|
||||
var block storage.Block
|
||||
mbr.BlockRef.MustReadBlock(&block)
|
||||
|
||||
mn := storage.GetMetricName()
|
||||
if err := mn.Unmarshal(mbr.MetricName); err != nil {
|
||||
log.Printf("NEZHA>> TSDB: failed to unmarshal metric name: %v", err)
|
||||
storage.PutMetricName(mn)
|
||||
continue
|
||||
}
|
||||
|
||||
serverIDBytes := mn.GetTagValue("server_id")
|
||||
if len(serverIDBytes) == 0 {
|
||||
storage.PutMetricName(mn)
|
||||
continue
|
||||
}
|
||||
|
||||
serverID, err := strconv.ParseUint(string(serverIDBytes), 10, 64)
|
||||
if err != nil {
|
||||
log.Printf("NEZHA>> TSDB: failed to parse server_id %q: %v", string(serverIDBytes), err)
|
||||
storage.PutMetricName(mn)
|
||||
continue
|
||||
}
|
||||
storage.PutMetricName(mn)
|
||||
|
||||
if err := block.UnmarshalData(); err != nil {
|
||||
log.Printf("NEZHA>> TSDB: failed to unmarshal block data: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
timestamps = timestamps[:0]
|
||||
values = values[:0]
|
||||
timestamps, values = block.AppendRowsWithTimeRangeFilter(timestamps, values, tr)
|
||||
|
||||
for i := range timestamps {
|
||||
result[serverID] = append(result[serverID], metricPoint{
|
||||
timestamp: timestamps[i],
|
||||
value: values[i],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if err := search.Error(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func calculateStats(points []rawDataPoint, downsampleInterval time.Duration) ServiceHistorySummary {
|
||||
if len(points) == 0 {
|
||||
return ServiceHistorySummary{}
|
||||
}
|
||||
|
||||
sort.Slice(points, func(i, j int) bool {
|
||||
return points[i].timestamp < points[j].timestamp
|
||||
})
|
||||
|
||||
var totalDelay float64
|
||||
var delayCount int
|
||||
var totalUp, totalDown uint64
|
||||
|
||||
for _, p := range points {
|
||||
if p.hasDelay {
|
||||
totalDelay += p.value
|
||||
delayCount++
|
||||
}
|
||||
if p.hasStatus {
|
||||
if p.status >= 0.5 {
|
||||
totalUp++
|
||||
} else {
|
||||
totalDown++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
summary := ServiceHistorySummary{
|
||||
TotalUp: totalUp,
|
||||
TotalDown: totalDown,
|
||||
}
|
||||
|
||||
if delayCount > 0 {
|
||||
summary.AvgDelay = totalDelay / float64(delayCount)
|
||||
}
|
||||
|
||||
if totalUp+totalDown > 0 {
|
||||
summary.UpPercent = float32(totalUp) / float32(totalUp+totalDown) * 100
|
||||
}
|
||||
|
||||
summary.DataPoints = downsample(points, downsampleInterval)
|
||||
|
||||
return summary
|
||||
}
|
||||
|
||||
func downsample(points []rawDataPoint, interval time.Duration) []DataPoint {
|
||||
if len(points) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
intervalMs := interval.Milliseconds()
|
||||
result := make([]DataPoint, 0)
|
||||
|
||||
// points 已排序,线性扫描分桶
|
||||
bucketStart := (points[0].timestamp / intervalMs) * intervalMs
|
||||
var totalDelay float64
|
||||
var delayCount, upCount, statusCount int
|
||||
|
||||
flushBucket := func() {
|
||||
var avgDelay float64
|
||||
if delayCount > 0 {
|
||||
avgDelay = totalDelay / float64(delayCount)
|
||||
}
|
||||
var status uint8
|
||||
if statusCount > 0 && upCount > statusCount/2 {
|
||||
status = 1
|
||||
}
|
||||
result = append(result, DataPoint{
|
||||
Timestamp: bucketStart,
|
||||
Delay: avgDelay,
|
||||
Status: status,
|
||||
})
|
||||
}
|
||||
|
||||
for _, p := range points {
|
||||
key := (p.timestamp / intervalMs) * intervalMs
|
||||
if key != bucketStart {
|
||||
flushBucket()
|
||||
bucketStart = key
|
||||
totalDelay = 0
|
||||
delayCount = 0
|
||||
upCount = 0
|
||||
statusCount = 0
|
||||
}
|
||||
if p.hasDelay {
|
||||
totalDelay += p.value
|
||||
delayCount++
|
||||
}
|
||||
if p.hasStatus {
|
||||
statusCount++
|
||||
if p.status >= 0.5 {
|
||||
upCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
flushBucket()
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func downsampleMetrics(points []rawDataPoint, interval time.Duration, useLastValue bool) []MetricDataPoint {
|
||||
if len(points) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
sort.Slice(points, func(i, j int) bool {
|
||||
return points[i].timestamp < points[j].timestamp
|
||||
})
|
||||
|
||||
intervalMs := interval.Milliseconds()
|
||||
result := make([]MetricDataPoint, 0)
|
||||
|
||||
bucketStart := (points[0].timestamp / intervalMs) * intervalMs
|
||||
var total float64
|
||||
var count int
|
||||
var last rawDataPoint
|
||||
|
||||
flushBucket := func() {
|
||||
var value float64
|
||||
if useLastValue {
|
||||
value = last.value
|
||||
} else if count > 0 {
|
||||
value = total / float64(count)
|
||||
}
|
||||
result = append(result, MetricDataPoint{
|
||||
Timestamp: bucketStart,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
|
||||
for _, p := range points {
|
||||
key := (p.timestamp / intervalMs) * intervalMs
|
||||
if key != bucketStart {
|
||||
flushBucket()
|
||||
bucketStart = key
|
||||
total = 0
|
||||
count = 0
|
||||
}
|
||||
total += p.value
|
||||
count++
|
||||
last = p
|
||||
}
|
||||
flushBucket()
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// isCumulativeMetric 判断指标是否为累积型(单调递增)
|
||||
func isCumulativeMetric(metric MetricType) bool {
|
||||
switch metric {
|
||||
case MetricServerNetInTransfer, MetricServerNetOutTransfer, MetricServerUptime:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (db *TSDB) QueryServerMetrics(serverID uint64, metric MetricType, period QueryPeriod) ([]MetricDataPoint, error) {
|
||||
db.mu.RLock()
|
||||
defer db.mu.RUnlock()
|
||||
if db.closed {
|
||||
return nil, fmt.Errorf("TSDB is closed")
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
tr := storage.TimeRange{
|
||||
MinTimestamp: now.Add(-period.Duration()).UnixMilli(),
|
||||
MaxTimestamp: now.UnixMilli(),
|
||||
}
|
||||
|
||||
serverIDStr := strconv.FormatUint(serverID, 10)
|
||||
|
||||
tfs := storage.NewTagFilters()
|
||||
if err := tfs.Add(nil, []byte(metric), false, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := tfs.Add([]byte("server_id"), []byte(serverIDStr), false, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
deadline := uint64(time.Now().Add(30 * time.Second).Unix())
|
||||
|
||||
var search storage.Search
|
||||
search.Init(nil, db.storage, []*storage.TagFilters{tfs}, tr, 100000, deadline)
|
||||
defer search.MustClose()
|
||||
|
||||
var points []rawDataPoint
|
||||
var timestamps []int64
|
||||
var values []float64
|
||||
|
||||
for search.NextMetricBlock() {
|
||||
mbr := search.MetricBlockRef
|
||||
var block storage.Block
|
||||
mbr.BlockRef.MustReadBlock(&block)
|
||||
|
||||
if err := block.UnmarshalData(); err != nil {
|
||||
log.Printf("NEZHA>> TSDB: failed to unmarshal block data: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
timestamps = timestamps[:0]
|
||||
values = values[:0]
|
||||
timestamps, values = block.AppendRowsWithTimeRangeFilter(timestamps, values, tr)
|
||||
|
||||
for i := range timestamps {
|
||||
points = append(points, rawDataPoint{
|
||||
timestamp: timestamps[i],
|
||||
value: values[i],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if err := search.Error(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return downsampleMetrics(points, period.DownsampleInterval(), isCumulativeMetric(metric)), nil
|
||||
}
|
||||
|
||||
func (db *TSDB) QueryServiceHistoryByServerID(serverID uint64, period QueryPeriod) (map[uint64]*ServiceHistoryResult, error) {
|
||||
db.mu.RLock()
|
||||
defer db.mu.RUnlock()
|
||||
if db.closed {
|
||||
return nil, fmt.Errorf("TSDB is closed")
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
tr := storage.TimeRange{
|
||||
MinTimestamp: now.Add(-period.Duration()).UnixMilli(),
|
||||
MaxTimestamp: now.UnixMilli(),
|
||||
}
|
||||
|
||||
serverIDStr := strconv.FormatUint(serverID, 10)
|
||||
|
||||
delayData, err := db.queryMetricByServerID(MetricServiceDelay, serverIDStr, tr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query delay data: %w", err)
|
||||
}
|
||||
|
||||
statusData, err := db.queryMetricByServerID(MetricServiceStatus, serverIDStr, tr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query status data: %w", err)
|
||||
}
|
||||
|
||||
serviceDataMap := make(map[uint64]map[int64]*rawDataPoint)
|
||||
|
||||
for serviceID, points := range delayData {
|
||||
if serviceDataMap[serviceID] == nil {
|
||||
serviceDataMap[serviceID] = make(map[int64]*rawDataPoint)
|
||||
}
|
||||
for _, p := range points {
|
||||
serviceDataMap[serviceID][p.timestamp] = &rawDataPoint{
|
||||
timestamp: p.timestamp,
|
||||
value: p.value,
|
||||
hasDelay: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for serviceID, points := range statusData {
|
||||
if serviceDataMap[serviceID] == nil {
|
||||
serviceDataMap[serviceID] = make(map[int64]*rawDataPoint)
|
||||
}
|
||||
for _, p := range points {
|
||||
if existing, ok := serviceDataMap[serviceID][p.timestamp]; ok {
|
||||
existing.status = p.value
|
||||
existing.hasStatus = true
|
||||
} else {
|
||||
serviceDataMap[serviceID][p.timestamp] = &rawDataPoint{
|
||||
timestamp: p.timestamp,
|
||||
status: p.value,
|
||||
hasStatus: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results := make(map[uint64]*ServiceHistoryResult)
|
||||
|
||||
for serviceID, pointsMap := range serviceDataMap {
|
||||
points := make([]rawDataPoint, 0, len(pointsMap))
|
||||
for _, p := range pointsMap {
|
||||
points = append(points, *p)
|
||||
}
|
||||
stats := calculateStats(points, period.DownsampleInterval())
|
||||
results[serviceID] = &ServiceHistoryResult{
|
||||
ServiceID: serviceID,
|
||||
Servers: []ServerServiceStats{{
|
||||
ServerID: serverID,
|
||||
Stats: stats,
|
||||
}},
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (db *TSDB) queryMetricByServerID(metric MetricType, serverID string, tr storage.TimeRange) (map[uint64][]metricPoint, error) {
|
||||
tfs := storage.NewTagFilters()
|
||||
if err := tfs.Add(nil, []byte(metric), false, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := tfs.Add([]byte("server_id"), []byte(serverID), false, false); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
deadline := uint64(time.Now().Add(30 * time.Second).Unix())
|
||||
|
||||
var search storage.Search
|
||||
search.Init(nil, db.storage, []*storage.TagFilters{tfs}, tr, 100000, deadline)
|
||||
defer search.MustClose()
|
||||
|
||||
result := make(map[uint64][]metricPoint)
|
||||
var timestamps []int64
|
||||
var values []float64
|
||||
|
||||
for search.NextMetricBlock() {
|
||||
mbr := search.MetricBlockRef
|
||||
var block storage.Block
|
||||
mbr.BlockRef.MustReadBlock(&block)
|
||||
|
||||
mn := storage.GetMetricName()
|
||||
if err := mn.Unmarshal(mbr.MetricName); err != nil {
|
||||
log.Printf("NEZHA>> TSDB: failed to unmarshal metric name: %v", err)
|
||||
storage.PutMetricName(mn)
|
||||
continue
|
||||
}
|
||||
|
||||
serviceIDBytes := mn.GetTagValue("service_id")
|
||||
if len(serviceIDBytes) == 0 {
|
||||
storage.PutMetricName(mn)
|
||||
continue
|
||||
}
|
||||
|
||||
serviceID, err := strconv.ParseUint(string(serviceIDBytes), 10, 64)
|
||||
if err != nil {
|
||||
log.Printf("NEZHA>> TSDB: failed to parse service_id %q: %v", string(serviceIDBytes), err)
|
||||
storage.PutMetricName(mn)
|
||||
continue
|
||||
}
|
||||
storage.PutMetricName(mn)
|
||||
|
||||
if err := block.UnmarshalData(); err != nil {
|
||||
log.Printf("NEZHA>> TSDB: failed to unmarshal block data: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
timestamps = timestamps[:0]
|
||||
values = values[:0]
|
||||
timestamps, values = block.AppendRowsWithTimeRangeFilter(timestamps, values, tr)
|
||||
|
||||
for i := range timestamps {
|
||||
result[serviceID] = append(result[serviceID], metricPoint{
|
||||
timestamp: timestamps[i],
|
||||
value: values[i],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if err := search.Error(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
Reference in New Issue
Block a user