优化忽略规则配置和 Agent 获取 IP

This commit is contained in:
naiba
2021-06-21 21:30:42 +08:00
parent c4f36d17d5
commit 4b0c0ad288
20 changed files with 370 additions and 291 deletions

View File

@@ -10,10 +10,17 @@ import (
"github.com/naiba/nezha/pkg/utils"
)
const (
_RuleCheckNoData = iota
_RuleCheckFail
_RuleCheckPass
)
// 报警规则
var alertsLock sync.RWMutex
var alerts []model.AlertRule
var alerts []*model.AlertRule
var alertsStore map[uint64]map[uint64][][]interface{}
var alertsPrevState map[uint64]map[uint64]uint
type NotificationHistory struct {
Duration time.Duration
@@ -22,6 +29,7 @@ type NotificationHistory struct {
func AlertSentinelStart() {
alertsStore = make(map[uint64]map[uint64][][]interface{})
alertsPrevState = make(map[uint64]map[uint64]uint)
notificationsLock.Lock()
if err := DB.Find(&notifications).Error; err != nil {
panic(err)
@@ -33,6 +41,7 @@ func AlertSentinelStart() {
}
for i := 0; i < len(alerts); i++ {
alertsStore[alerts[i].ID] = make(map[uint64][][]interface{})
alertsPrevState[alerts[i].ID] = make(map[uint64]uint)
}
alertsLock.Unlock()
@@ -58,23 +67,26 @@ func OnRefreshOrAddAlert(alert model.AlertRule) {
alertsLock.Lock()
defer alertsLock.Unlock()
delete(alertsStore, alert.ID)
delete(alertsPrevState, alert.ID)
var isEdit bool
for i := 0; i < len(alerts); i++ {
if alerts[i].ID == alert.ID {
alerts[i] = alert
alerts[i] = &alert
isEdit = true
}
}
if !isEdit {
alerts = append(alerts, alert)
alerts = append(alerts, &alert)
}
alertsStore[alert.ID] = make(map[uint64][][]interface{})
alertsPrevState[alert.ID] = make(map[uint64]uint)
}
func OnDeleteAlert(id uint64) {
alertsLock.Lock()
defer alertsLock.Unlock()
delete(alertsStore, id)
delete(alertsPrevState, id)
for i := 0; i < len(alerts); i++ {
if alerts[i].ID == id {
alerts = append(alerts[:i], alerts[i+1:]...)
@@ -98,11 +110,18 @@ func checkStatus() {
// 监测点
alertsStore[alert.ID][server.ID] = append(alertsStore[alert.
ID][server.ID], alert.Snapshot(server))
// 发送通知
max, desc := alert.Check(alertsStore[alert.ID][server.ID])
if desc != "" {
// 发送通知,分为触发报警和恢复通知
max, passed := alert.Check(alertsStore[alert.ID][server.ID])
if !passed {
alertsPrevState[alert.ID][server.ID] = _RuleCheckFail
message := fmt.Sprintf("报警规则:%s服务器%s(%s),逮到咯,快去看看!", alert.Name, server.Name, utils.IPDesensitize(server.Host.IP))
go SendNotification(message, true)
} else {
if alertsPrevState[alert.ID][server.ID] == _RuleCheckFail {
message := fmt.Sprintf("报警规则:%s服务器%s(%s),已恢复正常", alert.Name, server.Name, utils.IPDesensitize(server.Host.IP))
go SendNotification(message, true)
}
alertsPrevState[alert.ID][server.ID] = _RuleCheckPass
}
// 清理旧数据
if max > 0 && max < len(alertsStore[alert.ID][server.ID]) {

View File

@@ -13,7 +13,7 @@ import (
pb "github.com/naiba/nezha/proto"
)
var Version = "v0.7.4" // !!记得修改 README 中的 badge 版本!!
var Version = "v0.8.0" // !!记得修改 README 中的 badge 版本!!
const (
SnapshotDelay = 3
@@ -58,7 +58,7 @@ var CronLock sync.RWMutex
var Crons map[uint64]*model.Cron
var Cron *cron.Cron
func CronTrigger(c *model.Cron) {
func ManualTrigger(c *model.Cron) {
ServerLock.RLock()
defer ServerLock.RUnlock()
for j := 0; j < len(c.Servers); j++ {
@@ -73,3 +73,31 @@ func CronTrigger(c *model.Cron) {
}
}
}
func CronTrigger(cr model.Cron) func() {
crIgnoreMap := make(map[uint64]bool)
for j := 0; j < len(cr.Servers); j++ {
crIgnoreMap[cr.Servers[j]] = true
}
return func() {
ServerLock.RLock()
defer ServerLock.RUnlock()
for _, s := range ServerList {
if cr.Cover == model.CronCoverAll && crIgnoreMap[s.ID] {
continue
}
if cr.Cover == model.CronCoverIgnoreAll && !crIgnoreMap[s.ID] {
continue
}
if s.TaskStream != nil {
s.TaskStream.Send(&pb.Task{
Id: cr.ID,
Data: cr.Command,
Type: model.TaskTypeCommand,
})
} else {
SendNotification(fmt.Sprintf("计划任务:%s服务器%s 离线,无法执行。", cr.Name, s.Name), false)
}
}
}
}

View File

@@ -3,6 +3,7 @@ package dao
import (
"crypto/md5"
"encoding/hex"
"log"
"sync"
"time"
@@ -69,6 +70,9 @@ func SendNotification(desc string, muteable bool) {
}
if !flag {
if Conf.Debug {
log.Println("muted notification", desc, muteable)
}
return
}
}