Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(notifier): add emergency contacts monitor and worker #1089

Draft
wants to merge 3 commits into
base: feat/add-emergency-contacts-api
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 174 additions & 39 deletions cmd/notifier/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/moira-alert/moira/cmd"
"github.com/moira-alert/moira/notifier"
"github.com/moira-alert/moira/notifier/selfstate"
"github.com/moira-alert/moira/notifier/selfstate/heartbeat"
)

type config struct {
Expand Down Expand Up @@ -43,7 +44,7 @@ type notifierConfig struct {
// Senders configuration section. See https://moira.readthedocs.io/en/latest/installation/configuration.html for more explanation
Senders []map[string]interface{} `yaml:"senders"`
// Self state monitor configuration section. Note: No inner subscriptions is required. It's own notification mechanism will be used.
SelfState selfStateConfig `yaml:"moira_selfstate"`
Selfstate selfstateConfig `yaml:"moira_selfstate"`
// Web-UI uri prefix for trigger links in notifications. For example: with 'http://localhost' every notification will contain link like 'http://localhost/trigger/triggerId'
FrontURI string `yaml:"front_uri"`
// Timezone to use to convert ticks. Default is UTC. See https://golang.org/pkg/time/#LoadLocation for more details.
Expand All @@ -58,27 +59,118 @@ type notifierConfig struct {
SetLogLevel setLogLevelConfig `yaml:"set_log_level"`
}

type selfStateConfig struct {
// If true, Self state monitor will be enabled
Enabled bool `yaml:"enabled"`
// If true, Self state monitor will check remote checker status
RemoteTriggersEnabled bool `yaml:"remote_triggers_enabled"`
// Max Redis disconnect delay to send alert when reached
RedisDisconnectDelay string `yaml:"redis_disconect_delay"`
// Max Filter metrics receive delay to send alert when reached
LastMetricReceivedDelay string `yaml:"last_metric_received_delay"`
// Max Checker checks perform delay to send alert when reached
LastCheckDelay string `yaml:"last_check_delay"`
// Max Remote triggers Checker checks perform delay to send alert when reached
LastRemoteCheckDelay string `yaml:"last_remote_check_delay"`
// Contact list for Self state monitor alerts
Contacts []map[string]string `yaml:"contacts"`
type heartbeaterAlertConfig struct {
Name string `yaml:"name"`
Desc string `yaml:"desc"`
}

type heartbeaterBaseConfig struct {
Enabled bool `yaml:"enabled"`
NeedTurnOffNotifier bool `yaml:"need_turn_off_notifier"`

AlertCfg heartbeaterAlertConfig `yaml:"alert"`
}

func (cfg heartbeaterBaseConfig) getSettings() heartbeat.HeartbeaterBaseConfig {
return heartbeat.HeartbeaterBaseConfig{
Enabled: cfg.Enabled,
NeedTurnOffNotifier: cfg.NeedTurnOffNotifier,

AlertCfg: heartbeat.AlertConfig{
Name: cfg.AlertCfg.Name,
Desc: cfg.AlertCfg.Desc,
},
}
}

type databaseHeartbeaterConfig struct {
heartbeaterBaseConfig

RedisDisconnectDelay string `yaml:"redis_disconnect_delay"`
}

type filterHeartbeaterConfig struct {
heartbeaterBaseConfig

MetricReceivedDelay string `yaml:"last_metric_received_delay"`
}

type localCheckerHeartbeaterConfig struct {
heartbeaterBaseConfig

LocalCheckDelay string `yaml:"last_check_delay"`
}

type remoteCheckerHeartbeaterConfig struct {
heartbeaterBaseConfig

RemoteCheckDelay string `yaml:"last_remote_check_delay"`
}

type notifierHeartbeaterConfig struct {
heartbeaterBaseConfig
}

type heartbeatsConfig struct {
DatabaseCfg databaseHeartbeaterConfig `yaml:"database"`
FilterCfg filterHeartbeaterConfig `yaml:"filter"`
LocalCheckerCfg localCheckerHeartbeaterConfig `yaml:"local_checker"`
RemoteCheckerCfg remoteCheckerHeartbeaterConfig `yaml:"remote_checker"`
NotifierCfg notifierHeartbeaterConfig `yaml:"notifier"`
}

func (cfg heartbeatsConfig) getSettings() selfstate.HeartbeatsCfg {
return selfstate.HeartbeatsCfg{
DatabaseCfg: heartbeat.DatabaseHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.DatabaseCfg.heartbeaterBaseConfig.getSettings(),
RedisDisconnectDelay: to.Duration(cfg.DatabaseCfg.RedisDisconnectDelay),
},
FilterCfg: heartbeat.FilterHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.FilterCfg.heartbeaterBaseConfig.getSettings(),
MetricReceivedDelay: to.Duration(cfg.FilterCfg.MetricReceivedDelay),
},
LocalCheckerCfg: heartbeat.LocalCheckerHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.LocalCheckerCfg.heartbeaterBaseConfig.getSettings(),
LocalCheckDelay: to.Duration(cfg.LocalCheckerCfg.LocalCheckDelay),
},
RemoteCheckerCfg: heartbeat.RemoteCheckerHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.RemoteCheckerCfg.heartbeaterBaseConfig.getSettings(),
RemoteCheckDelay: to.Duration(cfg.RemoteCheckerCfg.RemoteCheckDelay),
},
NotifierCfg: heartbeat.NotifierHeartbeaterConfig{
HeartbeaterBaseConfig: cfg.NotifierCfg.heartbeaterBaseConfig.getSettings(),
},
}
}

type monitorBaseConfig struct {
Enabled bool `yaml:"enabled"`
HearbeatsCfg heartbeatsConfig `yaml:"heartbeats"`
// Self state monitor alerting interval
NoticeInterval string `yaml:"notice_interval"`
// Self state monitor check interval
CheckInterval string `yaml:"check_interval"`
}

type adminMonitorConfig struct {
monitorBaseConfig

AdminContacts []map[string]string `yaml:"contacts"`
}

type userMonitorConfig struct {
monitorBaseConfig
}

type monitorConfig struct {
AdminCfg adminMonitorConfig `yaml:"admin"`
UserCfg userMonitorConfig `yaml:"user"`
}

type selfstateConfig struct {
Monitor monitorConfig `yaml:"monitor"`
}

func getDefault() config {
return config{
Redis: cmd.RedisConfig{
Expand All @@ -105,12 +197,49 @@ func getDefault() config {
SenderTimeout: "10s",
ResendingTimeout: "1:00",
ReschedulingDelay: "60s",
SelfState: selfStateConfig{
Enabled: false,
RedisDisconnectDelay: "30s",
LastMetricReceivedDelay: "60s",
LastCheckDelay: "60s",
NoticeInterval: "300s",
Selfstate: selfstateConfig{
Monitor: monitorConfig{
AdminCfg: adminMonitorConfig{
monitorBaseConfig: monitorBaseConfig{
Enabled: false,
HearbeatsCfg: heartbeatsConfig{
DatabaseCfg: databaseHeartbeaterConfig{
RedisDisconnectDelay: "30s",
},
FilterCfg: filterHeartbeaterConfig{
MetricReceivedDelay: "60s",
},
LocalCheckerCfg: localCheckerHeartbeaterConfig{
LocalCheckDelay: "60s",
},
RemoteCheckerCfg: remoteCheckerHeartbeaterConfig{
RemoteCheckDelay: "300s",
},
NotifierCfg: notifierHeartbeaterConfig{},
},
},
},
UserCfg: userMonitorConfig{
monitorBaseConfig: monitorBaseConfig{
Enabled: false,
HearbeatsCfg: heartbeatsConfig{
DatabaseCfg: databaseHeartbeaterConfig{
RedisDisconnectDelay: "30s",
},
FilterCfg: filterHeartbeaterConfig{
MetricReceivedDelay: "60s",
},
LocalCheckerCfg: localCheckerHeartbeaterConfig{
LocalCheckDelay: "60s",
},
RemoteCheckerCfg: remoteCheckerHeartbeaterConfig{
RemoteCheckDelay: "300s",
},
NotifierCfg: notifierHeartbeaterConfig{},
},
},
},
},
},
FrontURI: "http://localhost",
Timezone: "UTC",
Expand Down Expand Up @@ -188,9 +317,10 @@ func (config *notifierConfig) getSettings(logger moira.Logger) notifier.Config {
Int("subscriptions_count", len(subscriptions)).
Msg("Found dynamic log rules in config for some contacts and subscriptions")

selfstateMonitorEnabled := config.Selfstate.Monitor.AdminCfg.Enabled || config.Selfstate.Monitor.UserCfg.Enabled

return notifier.Config{
SelfStateEnabled: config.SelfState.Enabled,
SelfStateContacts: config.SelfState.Contacts,
SelfstateMonitorEnabled: selfstateMonitorEnabled,
SendingTimeout: to.Duration(config.SenderTimeout),
ResendingTimeout: to.Duration(config.ResendingTimeout),
ReschedulingDelay: to.Duration(config.ReschedulingDelay),
Expand All @@ -214,21 +344,26 @@ func checkDateTimeFormat(format string) error {
return nil
}

func (config *selfStateConfig) getSettings() selfstate.Config {
// 10 sec is default check value
checkInterval := 10 * time.Second
if config.CheckInterval != "" {
checkInterval = to.Duration(config.CheckInterval)
}

func (cfg *selfstateConfig) getSettings() selfstate.Config {
return selfstate.Config{
Enabled: config.Enabled,
RedisDisconnectDelaySeconds: int64(to.Duration(config.RedisDisconnectDelay).Seconds()),
LastMetricReceivedDelaySeconds: int64(to.Duration(config.LastMetricReceivedDelay).Seconds()),
LastCheckDelaySeconds: int64(to.Duration(config.LastCheckDelay).Seconds()),
LastRemoteCheckDelaySeconds: int64(to.Duration(config.LastRemoteCheckDelay).Seconds()),
CheckInterval: checkInterval,
Contacts: config.Contacts,
NoticeIntervalSeconds: int64(to.Duration(config.NoticeInterval).Seconds()),
Monitor: selfstate.MonitorConfig{
AdminCfg: selfstate.AdminMonitorConfig{
MonitorBaseConfig: selfstate.MonitorBaseConfig{
Enabled: cfg.Monitor.AdminCfg.Enabled,
HeartbeatsCfg: cfg.Monitor.AdminCfg.HearbeatsCfg.getSettings(),
NoticeInterval: to.Duration(cfg.Monitor.AdminCfg.NoticeInterval),
CheckInterval: to.Duration(cfg.Monitor.AdminCfg.CheckInterval),
},
AdminContacts: cfg.Monitor.AdminCfg.AdminContacts,
},
UserCfg: selfstate.UserMonitorConfig{
MonitorBaseConfig: selfstate.MonitorBaseConfig{
Enabled: cfg.Monitor.UserCfg.Enabled,
HeartbeatsCfg: cfg.Monitor.UserCfg.HearbeatsCfg.getSettings(),
NoticeInterval: to.Duration(cfg.Monitor.UserCfg.NoticeInterval),
CheckInterval: to.Duration(cfg.Monitor.UserCfg.CheckInterval),
},
},
},
}
}
26 changes: 11 additions & 15 deletions cmd/notifier/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import (
"github.com/moira-alert/moira/notifier"
"github.com/moira-alert/moira/notifier/events"
"github.com/moira-alert/moira/notifier/notifications"
"github.com/moira-alert/moira/notifier/selfstate"
selfstate "github.com/moira-alert/moira/notifier/selfstate/worker"
_ "go.uber.org/automaxprocs"
)

Expand Down Expand Up @@ -117,18 +117,14 @@ func main() {
Msg("Can not configure senders")
}

// Start moira self state checker
if config.Notifier.SelfState.getSettings().Enabled {
selfState := selfstate.NewSelfCheckWorker(logger, database, sender, config.Notifier.SelfState.getSettings(), metrics.ConfigureHeartBeatMetrics(telemetry.Metrics))
if err := selfState.Start(); err != nil {
logger.Fatal().
Error(err).
Msg("SelfState failed")
}
defer stopSelfStateChecker(selfState)
} else {
logger.Debug().Msg("Moira Self State Monitoring disabled")
selfstateCfg := config.Notifier.Selfstate.getSettings()
selfstateWorker, err := selfstate.NewSelfstateWorker(selfstateCfg, logger, database, sender, systemClock)
if err != nil {
logger.Fatal().
Error(err).
Msg("Failed to create a new selfstate worker")
}
defer stopSelfstateWorker(selfstateWorker)

// Start moira notification fetcher
fetchNotificationsWorker := &notifications.FetchNotificationsWorker{
Expand Down Expand Up @@ -181,10 +177,10 @@ func stopNotificationsFetcher(worker *notifications.FetchNotificationsWorker) {
}
}

func stopSelfStateChecker(checker *selfstate.SelfCheckWorker) {
if err := checker.Stop(); err != nil {
func stopSelfstateWorker(selfstateWorker selfstate.SelfstateWorker) {
if err := selfstateWorker.Stop(); err != nil {
logger.Error().
Error(err).
Msg("Failed to stop self check worker")
Msg("Failed to stop selfstate worker")
}
}
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ require (
require github.com/prometheus/common v0.37.0

require (
github.com/golang/mock v1.6.0
github.com/go-playground/validator/v10 v10.4.1
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/mattermost/mattermost/server/public v0.1.1
github.com/mitchellh/mapstructure v1.5.0
Expand Down Expand Up @@ -184,12 +184,15 @@ require (
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/spec v0.20.9 // indirect
github.com/go-openapi/swag v0.22.4 // indirect
github.com/go-playground/locales v0.13.0 // indirect
github.com/go-playground/universal-translator v0.17.0 // indirect
github.com/hashicorp/go-hclog v1.6.3 // indirect
github.com/hashicorp/go-plugin v1.6.0 // indirect
github.com/hashicorp/yamux v0.1.1 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/imdario/mergo v0.3.11 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/leodido/go-urn v1.2.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/copystructure v1.0.0 // indirect
github.com/mitchellh/go-testing-interface v1.14.1 // indirect
Expand Down
6 changes: 5 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -637,9 +637,13 @@ github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/
github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU=
github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-playground/assert/v2 v2.0.1 h1:MsBgLAaY856+nPRTKrp3/OZK38U/wa0CcBYNjji3q3A=
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q=
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no=
github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE=
github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
github.com/go-redis/redis v6.15.9+incompatible h1:K0pv1D7EQUjfyoMql+r/jZqCLizCGKFlFgcHWWmHQjg=
github.com/go-redis/redis v6.15.9+incompatible/go.mod h1:NAIEuMOZ/fxfXJIrKDQDz8wamY7mA7PouImQ2Jvg6kA=
Expand Down Expand Up @@ -680,7 +684,6 @@ github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8=
github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=
github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
Expand Down Expand Up @@ -899,6 +902,7 @@ github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y=
github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
Expand Down
3 changes: 1 addition & 2 deletions notifier/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ const NotificationsLimitUnlimited = int64(-1)
// Config is sending settings including log settings.
type Config struct {
Enabled bool
SelfStateEnabled bool
SelfStateContacts []map[string]string
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

а куда делося?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Одно поле переименовал, а другое не использовалось

SelfstateMonitorEnabled bool
SendingTimeout time.Duration
ResendingTimeout time.Duration
ReschedulingDelay time.Duration
Expand Down
Loading
Loading