dash: HTTP等服务监控的故障/恢复报警

This commit is contained in:
naiba
2021-04-17 23:36:37 +08:00
parent e916ca06d0
commit 96be2330a9
18 changed files with 343 additions and 204 deletions

View File

@@ -3,7 +3,6 @@ package controller
import (
"errors"
"fmt"
"log"
"net/http"
"time"
@@ -80,79 +79,8 @@ func (p *commonPage) checkViewPassword(c *gin.Context) {
c.Next()
}
type ServiceItem struct {
Monitor model.Monitor
TotalUp uint64
TotalDown uint64
CurrentUp uint64
CurrentDown uint64
Delay *[30]float32
Up *[30]int
Down *[30]int
}
func (p *commonPage) service(c *gin.Context) {
var msm map[uint64]*ServiceItem
var cached bool
if _, has := c.Get(model.CtxKeyAuthorizedUser); !has {
data, has := dao.Cache.Get(model.CacheKeyServicePage)
if has {
log.Println("use cache")
msm = data.(map[uint64]*ServiceItem)
cached = true
}
}
if !cached {
msm = make(map[uint64]*ServiceItem)
var ms []model.Monitor
dao.DB.Find(&ms)
year, month, day := time.Now().Date()
today := time.Date(year, month, day, 0, 0, 0, 0, time.Local)
var mhs []model.MonitorHistory
dao.DB.Where("created_at >= ?", today.AddDate(0, 0, -29)).Find(&mhs)
for i := 0; i < len(ms); i++ {
msm[ms[i].ID] = &ServiceItem{
Monitor: ms[i],
Delay: &[30]float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
Up: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
Down: &[30]int{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
}
}
// 整合数据
todayStatus := make(map[uint64][]bool)
for i := 0; i < len(mhs); i++ {
dayIndex := 29
if mhs[i].CreatedAt.Before(today) {
dayIndex = 28 - (int(today.Sub(mhs[i].CreatedAt).Hours()) / 24)
} else {
todayStatus[mhs[i].MonitorID] = append(todayStatus[mhs[i].MonitorID], mhs[i].Successful)
}
if mhs[i].Successful {
msm[mhs[i].MonitorID].TotalUp++
msm[mhs[i].MonitorID].Delay[dayIndex] = (msm[mhs[i].MonitorID].Delay[dayIndex]*float32(msm[mhs[i].MonitorID].Up[dayIndex]) + mhs[i].Delay) / float32(msm[mhs[i].MonitorID].Up[dayIndex]+1)
msm[mhs[i].MonitorID].Up[dayIndex]++
} else {
msm[mhs[i].MonitorID].TotalDown++
msm[mhs[i].MonitorID].Down[dayIndex]++
}
}
// 当日最后 20 个采样作为当前状态
for _, m := range msm {
for i := len(todayStatus[m.Monitor.ID]) - 1; i >= 0 && i >= (len(todayStatus[m.Monitor.ID])-1-20); i-- {
if todayStatus[m.Monitor.ID][i] {
m.CurrentUp++
} else {
m.CurrentDown++
}
}
}
// 未登录人员缓存十分钟
dao.Cache.Set(model.CacheKeyServicePage, msm, time.Minute*10)
}
msm := dao.ServiceSentinelShared.LoadStats()
c.HTML(http.StatusOK, "theme-"+dao.Conf.Site.Theme+"/service", mygin.CommonEnvironment(c, gin.H{
"Title": "服务状态",
"Services": msm,

View File

@@ -73,6 +73,7 @@ func (ma *memberAPI) delete(c *gin.Context) {
case "monitor":
err = dao.DB.Delete(&model.Monitor{}, "id = ?", id).Error
if err == nil {
dao.ServiceSentinelShared.OnMonitorDelete(id)
err = dao.DB.Delete(&model.MonitorHistory{}, "monitor_id = ?", id).Error
}
case "cron":
@@ -194,6 +195,7 @@ type monitorForm struct {
Name string
Target string
Type uint8
Notify string
}
func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
@@ -205,6 +207,7 @@ func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
m.Target = mf.Target
m.Type = mf.Type
m.ID = mf.ID
m.Notify = mf.Notify == "on"
}
if err == nil {
if m.ID == 0 {
@@ -219,6 +222,8 @@ func (ma *memberAPI) addOrEditMonitor(c *gin.Context) {
Message: fmt.Sprintf("请求错误:%s", err),
})
return
} else {
dao.ServiceSentinelShared.OnMonitorUpdate()
}
c.JSON(http.StatusOK, model.Response{
Code: http.StatusOK,

View File

@@ -39,11 +39,9 @@ func (mp *memberPage) server(c *gin.Context) {
}
func (mp *memberPage) monitor(c *gin.Context) {
var monitors []model.Monitor
dao.DB.Find(&monitors)
c.HTML(http.StatusOK, "dashboard/monitor", mygin.CommonEnvironment(c, gin.H{
"Title": "服务监控",
"Monitors": monitors,
"Monitors": dao.ServiceSentinelShared.Monitors(),
}))
}