[0.9.19] 进程数/连接数/负载 监控报警

This commit is contained in:
naiba
2021-08-15 16:38:05 +08:00
parent 68624b15d6
commit c9a9441f3e
9 changed files with 189 additions and 60 deletions

View File

@@ -21,8 +21,8 @@ type Rule struct {
// net_all_speed、transfer_in、transfer_out、transfer_all、offline
// transfer_in_cycle、transfer_out_cycle、transfer_all_cycle
Type string `json:"type,omitempty"`
Min uint64 `json:"min,omitempty"` // 最小阈值 (百分比、字节 kb ÷ 1024)
Max uint64 `json:"max,omitempty"` // 最大阈值 (百分比、字节 kb ÷ 1024)
Min float64 `json:"min,omitempty"` // 最小阈值 (百分比、字节 kb ÷ 1024)
Max float64 `json:"max,omitempty"` // 最大阈值 (百分比、字节 kb ÷ 1024)
CycleStart time.Time `json:"cycle_start,omitempty"` // 流量统计的开始时间
CycleInterval uint64 `json:"cycle_interval,omitempty"` // 流量统计周期
Duration uint64 `json:"duration,omitempty"` // 持续时间 (秒)
@@ -34,11 +34,11 @@ type Rule struct {
LastCycleStatus map[uint64]interface{} `json:"-"`
}
func percentage(used, total uint64) uint64 {
func percentage(used, total uint64) float64 {
if total == 0 {
return 0
}
return used * 100 / total
return float64(used) * 100 / float64(total)
}
// Snapshot 未通过规则返回 struct{}{}, 通过返回 nil
@@ -57,11 +57,11 @@ func (u *Rule) Snapshot(server *Server, db *gorm.DB) interface{} {
return u.LastCycleStatus[server.ID]
}
var src uint64
var src float64
switch u.Type {
case "cpu":
src = uint64(server.State.CPU)
src = float64(server.State.CPU)
case "memory":
src = percentage(server.State.MemUsed, server.Host.MemTotal)
case "swap":
@@ -69,44 +69,56 @@ func (u *Rule) Snapshot(server *Server, db *gorm.DB) interface{} {
case "disk":
src = percentage(server.State.DiskUsed, server.Host.DiskTotal)
case "net_in_speed":
src = server.State.NetInSpeed
src = float64(server.State.NetInSpeed)
case "net_out_speed":
src = server.State.NetOutSpeed
src = float64(server.State.NetOutSpeed)
case "net_all_speed":
src = server.State.NetOutSpeed + server.State.NetOutSpeed
src = float64(server.State.NetOutSpeed + server.State.NetOutSpeed)
case "transfer_in":
src = server.State.NetInTransfer
src = float64(server.State.NetInTransfer)
case "transfer_out":
src = server.State.NetOutTransfer
src = float64(server.State.NetOutTransfer)
case "transfer_all":
src = server.State.NetOutTransfer + server.State.NetInTransfer
src = float64(server.State.NetOutTransfer + server.State.NetInTransfer)
case "offline":
if server.LastActive.IsZero() {
src = 0
} else {
src = uint64(server.LastActive.Unix())
src = float64(server.LastActive.Unix())
}
case "transfer_in_cycle":
src = server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn)
src = float64(server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn))
if u.CycleInterval != 1 {
var res NResult
db.Model(&Transfer{}).Select("SUM(`in`) AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
src += res.N
src += float64(res.N)
}
case "transfer_out_cycle":
src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut)
src = float64(server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut))
if u.CycleInterval != 1 {
var res NResult
db.Model(&Transfer{}).Select("SUM(`out`) AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
src += res.N
src += float64(res.N)
}
case "transfer_all_cycle":
src = server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut) + server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn)
src = float64(server.State.NetOutTransfer - uint64(server.PrevHourlyTransferOut) + server.State.NetInTransfer - uint64(server.PrevHourlyTransferIn))
if u.CycleInterval != 1 {
var res NResult
db.Model(&Transfer{}).Select("SUM(`in`+`out`) AS n").Where("created_at > ? AND server_id = ?", u.GetTransferDurationStart(), server.ID).Scan(&res)
src += res.N
src += float64(res.N)
}
case "load1":
src = server.State.Load1
case "load5":
src = server.State.Load5
case "load15":
src = server.State.Load15
case "tcp_conn_count":
src = float64(server.State.TcpConnCount)
case "udp_conn_count":
src = float64(server.State.UdpConnCount)
case "process_count":
src = float64(server.State.ProcessCount)
}
// 循环区间流量检测 · 更新下次需要检测时间
@@ -129,7 +141,7 @@ func (u *Rule) Snapshot(server *Server, db *gorm.DB) interface{} {
}
}
if u.Type == "offline" && uint64(time.Now().Unix())-src > 6 {
if u.Type == "offline" && float64(time.Now().Unix())-src > 6 {
return struct{}{}
} else if (u.Max > 0 && src > u.Max) || (u.Min > 0 && src < u.Min) {
return struct{}{}