任意时间区间(月流量)统计,无视重启~

This commit is contained in:
naiba
2021-07-14 23:53:37 +08:00
parent dff1e29c40
commit 63bb1570d2
16 changed files with 290 additions and 77 deletions

View File

@@ -27,10 +27,10 @@ func (r *AlertRule) AfterFind(tx *gorm.DB) error {
return json.Unmarshal([]byte(r.RulesRaw), &r.Rules)
}
func (r *AlertRule) Snapshot(server *Server) []interface{} {
func (r *AlertRule) Snapshot(server *Server, db *gorm.DB) []interface{} {
var point []interface{}
for i := 0; i < len(r.Rules); i++ {
point = append(point, r.Rules[i].Snapshot(server))
point = append(point, r.Rules[i].Snapshot(server, db))
}
return point
}
@@ -39,28 +39,39 @@ func (r *AlertRule) Check(points [][]interface{}) (int, bool) {
var max int
var count int
for i := 0; i < len(r.Rules); i++ {
total := 0.0
fail := 0.0
num := int(r.Rules[i].Duration)
if num > max {
max = num
}
if len(points) < num {
continue
}
for j := len(points) - 1; j >= 0 && len(points)-num <= j; j-- {
total++
if points[j][i] != nil {
fail++
if r.Rules[i].IsTransferDurationRule() {
if max < 1 {
max = 1
}
// 循环区间流量报警
for j := len(points[i]) - 1; j >= 0; j-- {
if points[i][j] != nil {
count++
break
}
}
} else {
// 常规报警
total := 0.0
fail := 0.0
num := int(r.Rules[i].Duration)
if num > max {
max = num
}
if len(points) < num {
continue
}
for j := len(points) - 1; j >= 0 && len(points)-num <= j; j-- {
total++
if points[j][i] != nil {
fail++
}
}
if fail/total > 0.7 {
count++
break
}
}
if fail/total > 0.7 {
count++
break
}
}
if count == len(r.Rules) {
return max, false
}
return max, true
return max, count != len(r.Rules)
}

View File

@@ -8,8 +8,8 @@ const CacheKeyOauth2State = "p:a:state"
const CacheKeyServicePage = "p:c:service"
type Common struct {
ID uint64 `gorm:"primary_key"`
CreatedAt time.Time
ID uint64 `gorm:"primary_key"`
CreatedAt time.Time `sql:"index"`
UpdatedAt time.Time
DeletedAt *time.Time `sql:"index"`
}

View File

@@ -1,21 +1,36 @@
package model
import "time"
import (
"strings"
"time"
"gorm.io/gorm"
)
const (
RuleCoverAll = iota
RuleCoverIgnoreAll
)
type NResult struct {
N uint64
}
type Rule struct {
// 指标类型cpu、memory、swap、disk、net_in_speed、net_out_speed
// net_all_speed、transfer_in、transfer_out、transfer_all、offline
Type string `json:"type,omitempty"`
Min uint64 `json:"min,omitempty"` // 最小阈值 (百分比、字节 kb ÷ 1024)
Max uint64 `json:"max,omitempty"` // 最阈值 (百分比、字节 kb ÷ 1024)
Duration uint64 `json:"duration,omitempty"` // 持续时间 (秒)
Cover uint64 `json:"cover,omitempty"` // 覆盖范围 RuleCoverAll/IgnoreAll
Ignore map[uint64]bool `json:"ignore,omitempty"` // 覆盖范围的排除
// transfer_in_cycle、transfer_out_cycle、transfer_all_cycle
Type string `json:"type,omitempty"`
Min uint64 `json:"min,omitempty"` // 最阈值 (百分比、字节 kb ÷ 1024)
Max uint64 `json:"max,omitempty"` // 最大阈值 (百分比、字节 kb ÷ 1024)
CycleStart time.Time `json:"cycle_start,omitempty"` // 流量统计的开始时间
CycleInterval uint64 `json:"cycle_interval,omitempty"` // 流量统计周期
Duration uint64 `json:"duration,omitempty"` // 持续时间 (秒)
Cover uint64 `json:"cover,omitempty"` // 覆盖范围 RuleCoverAll/IgnoreAll
Ignore map[uint64]bool `json:"ignore,omitempty"` // 覆盖范围的排除
// 只作为缓存使用,记录下次该检测的时间
NextTransferAt map[uint64]time.Time `json:"-"`
}
func percentage(used, total uint64) uint64 {
@@ -26,7 +41,7 @@ func percentage(used, total uint64) uint64 {
}
// Snapshot 未通过规则返回 struct{}{}, 通过返回 nil
func (u *Rule) Snapshot(server *Server) interface{} {
func (u *Rule) Snapshot(server *Server, db *gorm.DB) interface{} {
// 监控全部但是排除了此服务器
if u.Cover == RuleCoverAll && u.Ignore[server.ID] {
return nil
@@ -36,6 +51,11 @@ func (u *Rule) Snapshot(server *Server) interface{} {
return nil
}
// 循环区间流量检测 · 短期无需重复检测
if u.IsTransferDurationRule() && u.NextTransferAt[server.ID].After(time.Now()) {
return nil
}
var src uint64
switch u.Type {
@@ -65,6 +85,39 @@ func (u *Rule) Snapshot(server *Server) interface{} {
} else {
src = uint64(server.LastActive.Unix())
}
case "transfer_in_cycle":
src = server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn)
if u.CycleInterval != 1 {
var res NResult
db.Model(&Transfer{}).Select("SUM('in') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
src += res.N
}
case "transfer_out_cycle":
src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut)
if u.CycleInterval != 1 {
var res NResult
db.Model(&Transfer{}).Select("SUM('in') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
src += res.N
}
case "transfer_all_cycle":
src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut) + server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn)
if u.CycleInterval != 1 {
var res NResult
db.Model(&Transfer{}).Select("SUM('in'+'out') AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
src += res.N
}
}
// 循环区间流量检测 · 更新下次需要检测时间
if u.IsTransferDurationRule() {
seconds := 1800 * time.Duration(((u.Max - src) / u.Max))
if seconds < 180 {
seconds = 180
}
if u.NextTransferAt == nil {
u.NextTransferAt = make(map[uint64]time.Time)
}
u.NextTransferAt[server.ID] = time.Now().Add(time.Duration(time.Second * seconds))
}
if u.Type == "offline" && uint64(time.Now().Unix())-src > 6 {
@@ -75,3 +128,12 @@ func (u *Rule) Snapshot(server *Server) interface{} {
return nil
}
func (rule Rule) IsTransferDurationRule() bool {
return strings.HasSuffix(rule.Type, "_cycle")
}
func (rule Rule) GetTransferDurationStart() time.Time {
interval := 3600 * int64(rule.CycleInterval)
return time.Unix(rule.CycleStart.Unix()+(time.Now().Unix()-rule.CycleStart.Unix())/interval*interval, 0)
}

View File

@@ -23,6 +23,9 @@ type Server struct {
TaskClose chan error `gorm:"-" json:"-"`
TaskStream pb.NezhaService_RequestTaskServer `gorm:"-" json:"-"`
PrevHourlyTransferIn int64 `gorm:"-" json:"-"` // 上次数据点时的入站使用量
PrevHourlyTransferOut int64 `gorm:"-" json:"-"` // 上次数据点时的出站使用量
}
func (s Server) Marshal() template.JS {

8
model/transfer.go Normal file
View File

@@ -0,0 +1,8 @@
package model
type Transfer struct {
Common
ServerID uint64
In uint64
Out uint64
}