implement notification group (#450)

* implement notification group

* some fixes

* fix sql

* add listNotification

* retrieve notification from map

* create notification_group_notification if non-exist

* NotificationIDToGroup -> NotificationIDToGroups

* clean
This commit is contained in:
UUBulb
2024-10-23 21:55:12 +08:00
committed by GitHub
parent e792215f6e
commit 61e755d2b9
18 changed files with 666 additions and 180 deletions

View File

@@ -1,6 +1,7 @@
package singleton
import (
"fmt"
"log"
"sync"
"time"
@@ -66,11 +67,6 @@ func AlertSentinelStart() {
panic(err)
}
for _, alert := range Alerts {
// 旧版本可能不存在通知组 为其添加默认值
if alert.NotificationTag == "" {
alert.NotificationTag = "default"
DB.Save(alert)
}
alertsStore[alert.ID] = make(map[uint64][][]interface{})
alertsPrevState[alert.ID] = make(map[uint64]uint)
addCycleTransferStatsInfo(alert)
@@ -157,24 +153,22 @@ func checkStatus() {
// 始终触发模式或上次检查不为失败时触发报警(跳过单次触发+上次失败的情况)
if alert.TriggerMode == model.ModeAlwaysTrigger || alertsPrevState[alert.ID][server.ID] != _RuleCheckFail {
alertsPrevState[alert.ID][server.ID] = _RuleCheckFail
// message := fmt.Sprintf("[%s] %s(%s) %s", Localizer.MustLocalize(&i18n.LocalizeConfig{
// MessageID: "Incident",
// }), server.Name, IPDesensitize(server.Host.IP), alert.Name)
message := fmt.Sprintf("[%s] %s(%s) %s", "Incident",
server.Name, IPDesensitize(server.Host.IP), alert.Name)
go SendTriggerTasks(alert.FailTriggerTasks, curServer.ID)
// go SendNotification(alert.NotificationTag, message, NotificationMuteLabel.ServerIncident(server.ID, alert.ID), &curServer)
go SendNotification(alert.NotificationGroupID, message, NotificationMuteLabel.ServerIncident(server.ID, alert.ID), &curServer)
// 清除恢复通知的静音缓存
UnMuteNotification(alert.NotificationTag, NotificationMuteLabel.ServerIncidentResolved(server.ID, alert.ID))
UnMuteNotification(alert.NotificationGroupID, NotificationMuteLabel.ServerIncidentResolved(server.ID, alert.ID))
}
} else {
// 本次通过检查但上一次的状态为失败,则发送恢复通知
if alertsPrevState[alert.ID][server.ID] == _RuleCheckFail {
// message := fmt.Sprintf("[%s] %s(%s) %s", Localizer.MustLocalize(&i18n.LocalizeConfig{
// MessageID: "Resolved",
// }), server.Name, IPDesensitize(server.Host.IP), alert.Name)
message := fmt.Sprintf("[%s] %s(%s) %s", "Resolved",
server.Name, IPDesensitize(server.Host.IP), alert.Name)
go SendTriggerTasks(alert.RecoverTriggerTasks, curServer.ID)
// go SendNotification(alert.NotificationTag, message, NotificationMuteLabel.ServerIncidentResolved(server.ID, alert.ID), &curServer)
go SendNotification(alert.NotificationGroupID, message, NotificationMuteLabel.ServerIncidentResolved(server.ID, alert.ID), &curServer)
// 清除失败通知的静音缓存
UnMuteNotification(alert.NotificationTag, NotificationMuteLabel.ServerIncident(server.ID, alert.ID))
UnMuteNotification(alert.NotificationGroupID, NotificationMuteLabel.ServerIncident(server.ID, alert.ID))
}
alertsPrevState[alert.ID][server.ID] = _RuleCheckPass
}

View File

@@ -30,37 +30,32 @@ func loadCronTasks() {
var crons []model.Cron
DB.Find(&crons)
var err error
var notificationTagList []string
notificationMsgMap := make(map[string]*bytes.Buffer)
var notificationGroupList []uint64
notificationMsgMap := make(map[uint64]*bytes.Buffer)
for i := 0; i < len(crons); i++ {
// 触发任务类型无需注册
if crons[i].TaskType == model.CronTypeTriggerTask {
Crons[crons[i].ID] = &crons[i]
continue
}
// 旧版本计划任务可能不存在通知组 为其添加默认通知组
if crons[i].NotificationTag == "" {
crons[i].NotificationTag = "default"
DB.Save(crons[i])
}
// 注册计划任务
crons[i].CronJobID, err = Cron.AddFunc(crons[i].Scheduler, CronTrigger(crons[i]))
if err == nil {
Crons[crons[i].ID] = &crons[i]
} else {
// 当前通知组首次出现 将其加入通知组列表并初始化通知组消息缓存
if _, ok := notificationMsgMap[crons[i].NotificationTag]; !ok {
notificationTagList = append(notificationTagList, crons[i].NotificationTag)
notificationMsgMap[crons[i].NotificationTag] = bytes.NewBufferString("")
notificationMsgMap[crons[i].NotificationTag].WriteString("调度失败的计划任务:[")
if _, ok := notificationMsgMap[crons[i].NotificationGroupID]; !ok {
notificationGroupList = append(notificationGroupList, crons[i].NotificationGroupID)
notificationMsgMap[crons[i].NotificationGroupID] = bytes.NewBufferString("")
notificationMsgMap[crons[i].NotificationGroupID].WriteString("调度失败的计划任务:[")
}
notificationMsgMap[crons[i].NotificationTag].WriteString(fmt.Sprintf("%d,", crons[i].ID))
notificationMsgMap[crons[i].NotificationGroupID].WriteString(fmt.Sprintf("%d,", crons[i].ID))
}
}
// 向注册错误的计划任务所在通知组发送通知
for _, tag := range notificationTagList {
notificationMsgMap[tag].WriteString("] 这些任务将无法正常执行,请进入后点重新修改保存。")
SendNotification(tag, notificationMsgMap[tag].String(), nil)
for _, gid := range notificationGroupList {
notificationMsgMap[gid].WriteString("] 这些任务将无法正常执行,请进入后点重新修改保存。")
SendNotification(gid, notificationMsgMap[gid].String(), nil)
}
Cron.Start()
}
@@ -108,7 +103,7 @@ func CronTrigger(cr model.Cron, triggerServer ...uint64) func() {
// 保存当前服务器状态信息
curServer := model.Server{}
copier.Copy(&curServer, s)
SendNotification(cr.NotificationTag, fmt.Sprintf("[任务失败] %s服务器 %s 离线,无法执行。", cr.Name, s.Name), nil, &curServer)
SendNotification(cr.NotificationGroupID, fmt.Sprintf("[任务失败] %s服务器 %s 离线,无法执行。", cr.Name, s.Name), nil, &curServer)
}
}
return
@@ -133,7 +128,7 @@ func CronTrigger(cr model.Cron, triggerServer ...uint64) func() {
// 保存当前服务器状态信息
curServer := model.Server{}
copier.Copy(&curServer, s)
SendNotification(cr.NotificationTag, fmt.Sprintf("[任务失败] %s服务器 %s 离线,无法执行。", cr.Name, s.Name), nil, &curServer)
SendNotification(cr.NotificationGroupID, fmt.Sprintf("[任务失败] %s服务器 %s 离线,无法执行。", cr.Name, s.Name), nil, &curServer)
}
}
}

View File

@@ -9,55 +9,152 @@ import (
"github.com/naiba/nezha/model"
)
const firstNotificationDelay = time.Minute * 15
const (
firstNotificationDelay = time.Minute * 15
)
// 通知方式
var (
NotificationList map[string]map[uint64]*model.Notification // [NotificationMethodTag][NotificationID] -> model.Notification
NotificationIDToTag map[uint64]string // [NotificationID] -> NotificationTag
notificationsLock sync.RWMutex
NotificationList map[uint64]map[uint64]*model.Notification // [NotificationGroupID][NotificationID] -> model.Notification
NotificationIDToGroups map[uint64]map[uint64]struct{} // [NotificationID] -> NotificationGroupID
NotificationMap map[uint64]*model.Notification
NotificationGroup map[uint64]string // [NotificationGroupID] -> [NotificationGroupName]
NotificationsLock sync.RWMutex
NotificationGroupLock sync.RWMutex
)
// InitNotification 初始化 Tag <-> ID <-> Notification 的映射
// InitNotification 初始化 GroupID <-> ID <-> Notification 的映射
func InitNotification() {
NotificationList = make(map[string]map[uint64]*model.Notification)
NotificationIDToTag = make(map[uint64]string)
NotificationList = make(map[uint64]map[uint64]*model.Notification)
NotificationIDToGroups = make(map[uint64]map[uint64]struct{})
NotificationGroup = make(map[uint64]string)
}
// loadNotifications 从 DB 初始化通知方式相关参数
func loadNotifications() {
InitNotification()
notificationsLock.Lock()
defer notificationsLock.Unlock()
NotificationsLock.Lock()
defer NotificationsLock.Unlock()
groupNotifications := make(map[uint64][]uint64)
var ngn []model.NotificationGroupNotification
if err := DB.Find(&ngn).Error; err != nil {
panic(err)
}
for _, n := range ngn {
groupNotifications[n.NotificationGroupID] = append(groupNotifications[n.NotificationGroupID], n.NotificationID)
}
var notifications []model.Notification
if err := DB.Find(&notifications).Error; err != nil {
panic(err)
}
for i := 0; i < len(notifications); i++ {
// 旧版本的Tag可能不存在 自动设置为默认值
if notifications[i].Tag == "" {
SetDefaultNotificationTagInDB(&notifications[i])
NotificationMap = make(map[uint64]*model.Notification, len(notifications))
for i := range notifications {
NotificationMap[notifications[i].ID] = &notifications[i]
}
for gid, nids := range groupNotifications {
NotificationList[gid] = make(map[uint64]*model.Notification)
for _, nid := range nids {
if n, ok := NotificationMap[nid]; ok {
NotificationList[gid][n.ID] = n
if NotificationIDToGroups[n.ID] == nil {
NotificationIDToGroups[n.ID] = make(map[uint64]struct{})
}
NotificationIDToGroups[n.ID][gid] = struct{}{}
}
}
AddNotificationToList(&notifications[i])
}
}
// SetDefaultNotificationTagInDB 设置默认通知方式的 Tag
func SetDefaultNotificationTagInDB(n *model.Notification) {
n.Tag = "default"
if err := DB.Save(n).Error; err != nil {
log.Println("NEZHA>> SetDefaultNotificationTagInDB 错误: ", err)
// OnRefreshOrAddNotificationGroup 刷新通知方式组相关参数
func OnRefreshOrAddNotificationGroup(ng *model.NotificationGroup, ngn []uint64) {
NotificationsLock.Lock()
defer NotificationsLock.Unlock()
NotificationGroupLock.Lock()
defer NotificationGroupLock.Unlock()
var isEdit bool
if _, ok := NotificationGroup[ng.ID]; ok {
isEdit = true
}
if !isEdit {
AddNotificationGroupToList(ng, ngn)
} else {
UpdateNotificationGroupInList(ng, ngn)
}
}
// AddNotificationGroupToList 添加通知方式组到map中
func AddNotificationGroupToList(ng *model.NotificationGroup, ngn []uint64) {
NotificationGroup[ng.ID] = ng.Name
NotificationList[ng.ID] = make(map[uint64]*model.Notification, len(ngn))
for _, n := range ngn {
if NotificationIDToGroups[n] == nil {
NotificationIDToGroups[n] = make(map[uint64]struct{})
}
NotificationIDToGroups[n][ng.ID] = struct{}{}
NotificationList[ng.ID][n] = NotificationMap[n]
}
}
// UpdateNotificationGroupInList 在 map 中更新通知方式组
func UpdateNotificationGroupInList(ng *model.NotificationGroup, ngn []uint64) {
NotificationGroup[ng.ID] = ng.Name
oldList := make(map[uint64]struct{})
for nid := range NotificationList[ng.ID] {
oldList[nid] = struct{}{}
}
NotificationList[ng.ID] = make(map[uint64]*model.Notification)
for _, nid := range ngn {
NotificationList[ng.ID][nid] = NotificationMap[nid]
if NotificationIDToGroups[nid] == nil {
NotificationIDToGroups[nid] = make(map[uint64]struct{})
}
NotificationIDToGroups[nid][ng.ID] = struct{}{}
}
for oldID := range oldList {
if _, ok := NotificationList[ng.ID][oldID]; !ok {
delete(NotificationIDToGroups[oldID], ng.ID)
if len(NotificationIDToGroups[oldID]) == 0 {
delete(NotificationIDToGroups, oldID)
}
}
}
}
// UpdateNotificationGroupInList 删除通知方式组
func OnDeleteNotificationGroup(gids []uint64) {
NotificationsLock.Lock()
defer NotificationsLock.Unlock()
for _, gid := range gids {
delete(NotificationGroup, gid)
delete(NotificationList, gid)
}
}
// OnRefreshOrAddNotification 刷新通知方式相关参数
func OnRefreshOrAddNotification(n *model.Notification) {
notificationsLock.Lock()
defer notificationsLock.Unlock()
NotificationsLock.Lock()
defer NotificationsLock.Unlock()
var isEdit bool
if _, ok := NotificationIDToTag[n.ID]; ok {
_, ok := NotificationMap[n.ID]
if ok {
isEdit = true
}
if !isEdit {
@@ -69,47 +166,47 @@ func OnRefreshOrAddNotification(n *model.Notification) {
// AddNotificationToList 添加通知方式到map中
func AddNotificationToList(n *model.Notification) {
// 当前 Tag 不存在,创建对应该 Tag 的 子 map 后再添加
if _, ok := NotificationList[n.Tag]; !ok {
NotificationList[n.Tag] = make(map[uint64]*model.Notification)
}
NotificationList[n.Tag][n.ID] = n
NotificationIDToTag[n.ID] = n.Tag
NotificationMap[n.ID] = n
}
// UpdateNotificationInList 在 map 中更新通知方式
func UpdateNotificationInList(n *model.Notification) {
if n.Tag != NotificationIDToTag[n.ID] {
// 如果 Tag 不一致,则需要先移除原有的映射关系
delete(NotificationList[NotificationIDToTag[n.ID]], n.ID)
delete(NotificationIDToTag, n.ID)
// 将新的 Tag 中的通知方式添加到 map 中
AddNotificationToList(n)
} else {
// 如果 Tag 一致,则直接更新
NotificationList[n.Tag][n.ID] = n
NotificationMap[n.ID] = n
// 如果已经与通知组有绑定关系,更新
if gids, ok := NotificationIDToGroups[n.ID]; ok {
for gid := range gids {
NotificationList[gid][n.ID] = n
}
}
}
// OnDeleteNotification 在map中删除通知方式
func OnDeleteNotification(id uint64) {
notificationsLock.Lock()
defer notificationsLock.Unlock()
// OnDeleteNotification 在map和表中删除通知方式
func OnDeleteNotification(id []uint64) {
NotificationsLock.Lock()
defer NotificationsLock.Unlock()
delete(NotificationList[NotificationIDToTag[id]], id)
delete(NotificationIDToTag, id)
for _, i := range id {
delete(NotificationMap, i)
// 如果绑定了通知组才删除
if gids, ok := NotificationIDToGroups[i]; ok {
for gid := range gids {
delete(NotificationList[gid], i)
delete(NotificationIDToGroups, i)
}
}
}
}
func UnMuteNotification(notificationTag string, muteLabel *string) {
fullMuteLabel := *NotificationMuteLabel.AppendNotificationTag(muteLabel, notificationTag)
func UnMuteNotification(notificationGroupID uint64, muteLabel *string) {
fullMuteLabel := *NotificationMuteLabel.AppendNotificationGroupName(muteLabel, notificationGroupID)
Cache.Delete(fullMuteLabel)
}
// SendNotification 向指定的通知方式组的所有通知方式发送通知
func SendNotification(notificationTag string, desc string, muteLabel *string, ext ...*model.Server) {
func SendNotification(notificationGroupID uint64, desc string, muteLabel *string, ext ...*model.Server) {
if muteLabel != nil {
// 将通知方式组名称加入静音标志
muteLabel := *NotificationMuteLabel.AppendNotificationTag(muteLabel, notificationTag)
muteLabel := *NotificationMuteLabel.AppendNotificationGroupName(muteLabel, notificationGroupID)
// 通知防骚扰策略
var flag bool
if cacheN, has := Cache.Get(muteLabel); has {
@@ -142,12 +239,12 @@ func SendNotification(notificationTag string, desc string, muteLabel *string, ex
}
}
// 向该通知方式组的所有通知方式发出通知
notificationsLock.RLock()
defer notificationsLock.RUnlock()
for _, n := range NotificationList[notificationTag] {
NotificationsLock.RLock()
defer NotificationsLock.RUnlock()
for _, n := range NotificationList[notificationGroupID] {
log.Println("NEZHA>> 尝试通知", n.Name)
}
for _, n := range NotificationList[notificationTag] {
for _, n := range NotificationList[notificationGroupID] {
ns := model.NotificationServerBundle{
Notification: n,
Server: nil,
@@ -183,8 +280,10 @@ func (_NotificationMuteLabel) ServerIncidentResolved(alertId uint64, serverId ui
return &label
}
func (_NotificationMuteLabel) AppendNotificationTag(label *string, notificationTag string) *string {
newLabel := fmt.Sprintf("%s:%s", *label, notificationTag)
func (_NotificationMuteLabel) AppendNotificationGroupName(label *string, notificationGroupID uint64) *string {
NotificationGroupLock.RLock()
defer NotificationGroupLock.RUnlock()
newLabel := fmt.Sprintf("%s:%s", *label, NotificationGroup[notificationGroupID])
return &newLabel
}

View File

@@ -186,11 +186,6 @@ func (ss *ServiceSentinel) loadMonitorHistory() {
defer ss.monitorsLock.Unlock()
for i := 0; i < len(monitors); i++ {
// 旧版本可能不存在通知组 为其设置默认组
if monitors[i].NotificationTag == "" {
monitors[i].NotificationTag = "default"
DB.Save(monitors[i])
}
task := *monitors[i]
// 通过cron定时将服务监控任务传递给任务调度管道
monitors[i].CronJobID, err = Cron.AddFunc(task.CronSpec(), func() {
@@ -432,7 +427,7 @@ func (ss *ServiceSentinel) worker() {
if mh.Delay > 0 {
ss.monitorsLock.RLock()
if ss.monitors[mh.GetId()].LatencyNotify {
notificationTag := ss.monitors[mh.GetId()].NotificationTag
notificationGroupID := ss.monitors[mh.GetId()].NotificationGroupID
minMuteLabel := NotificationMuteLabel.ServiceLatencyMin(mh.GetId())
maxMuteLabel := NotificationMuteLabel.ServiceLatencyMax(mh.GetId())
if mh.Delay > ss.monitors[mh.GetId()].MaxLatency {
@@ -440,19 +435,19 @@ func (ss *ServiceSentinel) worker() {
ServerLock.RLock()
reporterServer := ServerList[r.Reporter]
msg := fmt.Sprintf("[Latency] %s %2f > %2f, Reporter: %s", ss.monitors[mh.GetId()].Name, mh.Delay, ss.monitors[mh.GetId()].MaxLatency, reporterServer.Name)
go SendNotification(notificationTag, msg, minMuteLabel)
go SendNotification(notificationGroupID, msg, minMuteLabel)
ServerLock.RUnlock()
} else if mh.Delay < ss.monitors[mh.GetId()].MinLatency {
// 延迟低于最小值
ServerLock.RLock()
reporterServer := ServerList[r.Reporter]
msg := fmt.Sprintf("[Latency] %s %2f < %2f, Reporter: %s", ss.monitors[mh.GetId()].Name, mh.Delay, ss.monitors[mh.GetId()].MinLatency, reporterServer.Name)
go SendNotification(notificationTag, msg, maxMuteLabel)
go SendNotification(notificationGroupID, msg, maxMuteLabel)
ServerLock.RUnlock()
} else {
// 正常延迟, 清除静音缓存
UnMuteNotification(notificationTag, minMuteLabel)
UnMuteNotification(notificationTag, maxMuteLabel)
UnMuteNotification(notificationGroupID, minMuteLabel)
UnMuteNotification(notificationGroupID, maxMuteLabel)
}
}
ss.monitorsLock.RUnlock()
@@ -471,16 +466,16 @@ func (ss *ServiceSentinel) worker() {
ServerLock.RLock()
reporterServer := ServerList[r.Reporter]
notificationTag := ss.monitors[mh.GetId()].NotificationTag
notificationGroupID := ss.monitors[mh.GetId()].NotificationGroupID
notificationMsg := fmt.Sprintf("[%s] %s Reporter: %s, Error: %s", StatusCodeToString(stateCode), ss.monitors[mh.GetId()].Name, reporterServer.Name, mh.Data)
muteLabel := NotificationMuteLabel.ServiceStateChanged(mh.GetId())
// 状态变更时,清除静音缓存
if stateCode != lastStatus {
UnMuteNotification(notificationTag, muteLabel)
UnMuteNotification(notificationGroupID, muteLabel)
}
go SendNotification(notificationTag, notificationMsg, muteLabel)
go SendNotification(notificationGroupID, notificationMsg, muteLabel)
ServerLock.RUnlock()
}
@@ -515,14 +510,14 @@ func (ss *ServiceSentinel) worker() {
ss.monitorsLock.RLock()
if ss.monitors[mh.GetId()].Notify {
muteLabel := NotificationMuteLabel.ServiceSSL(mh.GetId(), "network")
go SendNotification(ss.monitors[mh.GetId()].NotificationTag, fmt.Sprintf("[SSL] Fetch cert info failed, %s %s", ss.monitors[mh.GetId()].Name, errMsg), muteLabel)
go SendNotification(ss.monitors[mh.GetId()].NotificationGroupID, fmt.Sprintf("[SSL] Fetch cert info failed, %s %s", ss.monitors[mh.GetId()].Name, errMsg), muteLabel)
}
ss.monitorsLock.RUnlock()
}
} else {
// 清除网络错误静音缓存
UnMuteNotification(ss.monitors[mh.GetId()].NotificationTag, NotificationMuteLabel.ServiceSSL(mh.GetId(), "network"))
UnMuteNotification(ss.monitors[mh.GetId()].NotificationGroupID, NotificationMuteLabel.ServiceSSL(mh.GetId(), "network"))
var newCert = strings.Split(mh.Data, "|")
if len(newCert) > 1 {
@@ -545,7 +540,7 @@ func (ss *ServiceSentinel) worker() {
ss.sslCertCache[mh.GetId()] = mh.Data
}
notificationTag := ss.monitors[mh.GetId()].NotificationTag
notificationGroupID := ss.monitors[mh.GetId()].NotificationGroupID
serviceName := ss.monitors[mh.GetId()].Name
ss.monitorsLock.Unlock()
@@ -562,7 +557,7 @@ func (ss *ServiceSentinel) worker() {
// 静音规则: 服务id+证书过期时间
// 用于避免多个监测点对相同证书同时报警
muteLabel := NotificationMuteLabel.ServiceSSL(mh.GetId(), fmt.Sprintf("expire_%s", expiresTimeStr))
go SendNotification(notificationTag, fmt.Sprintf("[SSL] %s %s", serviceName, errMsg), muteLabel)
go SendNotification(notificationGroupID, fmt.Sprintf("[SSL] %s %s", serviceName, errMsg), muteLabel)
}
// 证书变更提醒
@@ -572,7 +567,7 @@ func (ss *ServiceSentinel) worker() {
oldCert[0], expiresOld.Format("2006-01-02 15:04:05"), newCert[0], expiresNew.Format("2006-01-02 15:04:05"))
// 证书变更后会自动更新缓存,所以不需要静音
go SendNotification(notificationTag, fmt.Sprintf("[SSL] %s %s", serviceName, errMsg), nil)
go SendNotification(notificationGroupID, fmt.Sprintf("[SSL] %s %s", serviceName, errMsg), nil)
}
}
}

View File

@@ -65,7 +65,7 @@ func InitDBFromPath(path string) {
err = DB.AutoMigrate(model.Server{}, model.User{},
model.Notification{}, model.AlertRule{}, model.Monitor{},
model.MonitorHistory{}, model.Cron{}, model.Transfer{},
model.ApiToken{}, model.NAT{}, model.DDNSProfile{})
model.ApiToken{}, model.NAT{}, model.DDNSProfile{}, model.NotificationGroupNotification{})
if err != nil {
panic(err)
}