dash: HTTP等服务监控的故障/恢复报警

This commit is contained in:
naiba
2021-04-17 23:36:37 +08:00
parent e916ca06d0
commit 96be2330a9
18 changed files with 343 additions and 204 deletions

View File

@@ -3,8 +3,6 @@ package rpc
import (
"context"
"fmt"
"log"
"strings"
"time"
"github.com/naiba/nezha/model"
@@ -22,49 +20,9 @@ func (s *NezhaHandler) ReportTask(c context.Context, r *pb.TaskResult) (*pb.Rece
if clientID, err = s.Auth.Check(c); err != nil {
return nil, err
}
if r.GetType() == model.TaskTypeHTTPGET {
// SSL 证书报警
var errMsg string
if strings.HasPrefix(r.GetData(), "SSL证书错误") {
// 排除 i/o timeont、connection timeout、EOF 错误
if !strings.HasSuffix(r.GetData(), "timeout") &&
!strings.HasSuffix(r.GetData(), "EOF") &&
!strings.HasSuffix(r.GetData(), "timed out") {
errMsg = r.GetData()
}
} else {
var last model.MonitorHistory
var newCert = strings.Split(r.GetData(), "|")
if len(newCert) > 1 {
expiresNew, _ := time.Parse("2006-01-02 15:04:05 -0700 MST", newCert[1])
// 证书过期提醒
if expiresNew.Before(time.Now().AddDate(0, 0, 7)) {
errMsg = fmt.Sprintf(
"SSL证书将在七天内过期过期时间%s。",
expiresNew.Format("2006-01-02 15:04:05"))
}
// 证书变更提醒
if err := dao.DB.Where("monitor_id = ? AND data LIKE ?", r.GetId(), "%|%").Order("id DESC").First(&last).Error; err == nil {
var oldCert = strings.Split(last.Data, "|")
var expiresOld time.Time
if len(oldCert) > 1 {
expiresOld, _ = time.Parse("2006-01-02 15:04:05 -0700 MST", oldCert[1])
}
if last.Data != "" && oldCert[0] != newCert[0] && !expiresNew.Equal(expiresOld) {
errMsg = fmt.Sprintf(
"SSL证书变更%s, %s 过期;新:%s, %s 过期。",
oldCert[0], expiresOld.Format("2006-01-02 15:04:05"), newCert[0], expiresNew.Format("2006-01-02 15:04:05"))
}
}
}
}
if errMsg != "" {
var monitor model.Monitor
dao.DB.First(&monitor, "id = ?", r.GetId())
dao.SendNotification(fmt.Sprintf("服务监控:%s %s", monitor.Name, errMsg), true)
}
}
if r.GetType() == model.TaskTypeCommand {
if r.GetType() != model.TaskTypeCommand {
dao.ServiceSentinelShared.Dispatch(r)
} else {
// 处理上报的计划任务
dao.CronLock.RLock()
defer dao.CronLock.RUnlock()
@@ -81,12 +39,6 @@ func (s *NezhaHandler) ReportTask(c context.Context, r *pb.TaskResult) (*pb.Rece
LastResult: r.GetSuccessful(),
})
}
} else {
// 存入历史记录
mh := model.PB2MonitorHistory(r)
if err := dao.DB.Create(&mh).Error; err != nil {
return nil, err
}
}
return &pb.Receipt{Proced: true}, nil
}
@@ -102,10 +54,7 @@ func (s *NezhaHandler) RequestTask(h *pb.Host, stream pb.NezhaService_RequestTas
dao.ServerList[clientID].TaskStream = stream
dao.ServerList[clientID].TaskClose = closeCh
dao.ServerLock.RUnlock()
select {
case err = <-closeCh:
return err
}
return <-closeCh
}
func (s *NezhaHandler) ReportSystemState(c context.Context, r *pb.State) (*pb.Receipt, error) {
@@ -131,7 +80,6 @@ func (s *NezhaHandler) ReportSystemInfo(c context.Context, r *pb.Host) (*pb.Rece
host := model.PB2Host(r)
dao.ServerLock.RLock()
defer dao.ServerLock.RUnlock()
log.Println(dao.Conf.IgnoredIPNotificationServerIDs)
if dao.Conf.EnableIPChangeNotification &&
dao.Conf.IgnoredIPNotificationServerIDs[clientID] != struct{}{} &&
dao.ServerList[clientID].Host != nil &&