diff --git a/checker/check.go b/checker/check.go index 698869cb8..db3e90c6c 100644 --- a/checker/check.go +++ b/checker/check.go @@ -132,24 +132,17 @@ func (triggerChecker *TriggerChecker) handleFetchError(checkData moira.CheckData triggerChecker.trigger.ClusterKey(), ) } - case remote.ErrRemoteTriggerResponse: - timeSinceLastSuccessfulCheck := checkData.Timestamp - checkData.LastSuccessfulCheckTimestamp - if timeSinceLastSuccessfulCheck >= triggerChecker.ttl { - checkData.State = moira.StateEXCEPTION - checkData.Message = fmt.Sprintf("Remote server unavailable. Trigger is not checked for %d seconds", timeSinceLastSuccessfulCheck) - - var comparingErr error - checkData, comparingErr = triggerChecker.compareTriggerStates(checkData) - if comparingErr != nil { - triggerChecker.logger.Error(). - Error(comparingErr). - String(moira.LogFieldNameTriggerID, triggerChecker.triggerID). - Msg("cannot compare trigger states") - } - } + case remote.ErrRemoteUnavailable: + checkData.State = moira.StateEXCEPTION + checkData.Message = fmt.Sprintf("Remote server unavailable. Trigger is not checked since: %v", checkData.LastSuccessfulCheckTimestamp) - logTriggerCheckException(triggerChecker.logger, triggerChecker.triggerID, err) - case local.ErrUnknownFunction, local.ErrEvalExpr: + triggerChecker.logger.Error(). + String(moira.LogFieldNameTriggerID, triggerChecker.triggerID). + String("trigger.cluster_id", triggerChecker.trigger.ClusterId.String()). + String("trigger.source", triggerChecker.trigger.TriggerSource.String()). + Error(err). + Msg("Trigger check failed") + case local.ErrUnknownFunction, local.ErrEvalExpr, remote.ErrRemoteTriggerResponse: checkData.State = moira.StateEXCEPTION checkData.Message = err.Error() logTriggerCheckException(triggerChecker.logger, triggerChecker.triggerID, err) diff --git a/checker/check_test.go b/checker/check_test.go index 8a5ae0b1e..68d26e15c 100644 --- a/checker/check_test.go +++ b/checker/check_test.go @@ -1,6 +1,7 @@ package checker import ( + "errors" "fmt" "math" "testing" @@ -12,6 +13,7 @@ import ( logging "github.com/moira-alert/moira/logging/zerolog_adapter" metricSource "github.com/moira-alert/moira/metric_source" "github.com/moira-alert/moira/metric_source/local" + "github.com/moira-alert/moira/metric_source/remote" "go.uber.org/mock/gomock" "github.com/moira-alert/moira/metrics" @@ -1924,3 +1926,242 @@ func TestTriggerChecker_handlePrepareError(t *testing.T) { }) }) } + +func TestTriggerChecker_handleFetchError(t *testing.T) { + Convey("Test handleFetchError", t, func() { + mockCtrl := gomock.NewController(t) + defer mockCtrl.Finish() + dataBase := mock_moira_alert.NewMockDatabase(mockCtrl) + logger, _ := logging.GetLogger("Test") + + metric := "some.metric" + testTime := time.Date(2022, time.June, 6, 10, 0, 0, 0, time.UTC).Unix() + + trigger := &moira.Trigger{ + ID: "test trigger", + TriggerSource: moira.GraphiteLocal, + ClusterId: moira.DefaultCluster, + } + triggerChecker := TriggerChecker{ + triggerID: trigger.ID, + from: testTime - 50, + until: testTime, + trigger: trigger, + database: dataBase, + logger: logger, + ttlState: moira.TTLStateNODATA, + lastCheck: &moira.CheckData{ + State: moira.StateOK, + Timestamp: testTime - 10, + Metrics: map[string]moira.MetricState{ + metric: { + State: moira.StateOK, + Timestamp: testTime - 41, + }, + }, + }, + } + + Convey("with ErrTriggerHasEmptyTargets, ErrTriggerHasOnlyWildcards", func() { + errorList := []error{ + ErrTriggerHasEmptyTargets{}, + ErrTriggerHasOnlyWildcards{}, + } + + Convey("when triggerChecker.ttl == 0", func() { + triggerChecker.ttl = 0 + + for i, givenErr := range errorList { + Convey(fmt.Sprintf("Case %v: %T", i+1, givenErr), func() { + expectedCheckData := moira.CheckData{ + Score: int64(1_000), + Metrics: triggerChecker.lastCheck.Metrics, + State: triggerChecker.ttlState.ToTriggerState(), + Timestamp: triggerChecker.until, + Message: givenErr.Error(), + MetricsToTargetRelation: map[string]string{}, + } + + dataBase.EXPECT().SetTriggerLastCheck( + triggerChecker.triggerID, + &expectedCheckData, + triggerChecker.trigger.ClusterKey(), + ).Return(nil).Times(1) + + err := triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr) + So(err, ShouldBeNil) + }) + } + }) + + Convey("when triggerChecker.ttl != 0", func() { + triggerChecker.ttl = 600 + + for i, givenErr := range errorList { + Convey(fmt.Sprintf("Case %v: %T", i+1, givenErr), func() { + expectedCheckData := moira.CheckData{ + Score: int64(1_000), + Metrics: triggerChecker.lastCheck.Metrics, + State: triggerChecker.ttlState.ToTriggerState(), + Timestamp: triggerChecker.until, + EventTimestamp: triggerChecker.until, + Message: givenErr.Error(), + MetricsToTargetRelation: map[string]string{}, + } + + dataBase.EXPECT().PushNotificationEvent( + &moira.NotificationEvent{ + IsTriggerEvent: true, + TriggerID: triggerChecker.triggerID, + State: triggerChecker.ttlState.ToTriggerState(), + OldState: triggerChecker.lastCheck.State, + Timestamp: triggerChecker.until, + Metric: triggerChecker.trigger.Name, + }, + true, + ).Return(nil).Times(1) + dataBase.EXPECT().SetTriggerLastCheck( + triggerChecker.triggerID, + &expectedCheckData, + triggerChecker.trigger.ClusterKey(), + ).Return(nil).Times(1) + + err := triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr) + So(err, ShouldBeNil) + }) + } + }) + }) + + Convey("with graphite remote unavailable", func() { + givenErr := remote.ErrRemoteUnavailable{ + InternalError: errors.New("some err"), + } + + triggerChecker.ttl = 10 + triggerChecker.lastCheck.LastSuccessfulCheckTimestamp = triggerChecker.until - 20 + + expectedCheckData := moira.CheckData{ + Score: int64(100_000), + Metrics: triggerChecker.lastCheck.Metrics, + State: moira.StateEXCEPTION, + Timestamp: triggerChecker.until, + EventTimestamp: triggerChecker.until, + LastSuccessfulCheckTimestamp: triggerChecker.lastCheck.LastSuccessfulCheckTimestamp, + Message: fmt.Sprintf( + "Remote server unavailable. Trigger is not checked since: %v", + triggerChecker.lastCheck.LastSuccessfulCheckTimestamp), + MetricsToTargetRelation: map[string]string{}, + } + + dataBase.EXPECT().PushNotificationEvent( + &moira.NotificationEvent{ + IsTriggerEvent: true, + TriggerID: triggerChecker.triggerID, + State: moira.StateEXCEPTION, + OldState: triggerChecker.lastCheck.State, + Timestamp: triggerChecker.until, + Metric: triggerChecker.trigger.Name, + }, + true, + ).Return(nil).Times(1) + dataBase.EXPECT().SetTriggerLastCheck( + triggerChecker.triggerID, + &expectedCheckData, + triggerChecker.trigger.ClusterKey(), + ).Return(nil).Times(1) + + err := triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr) + So(err, ShouldBeNil) + }) + + Convey("with bad functions, problems in expressions, etc", func() { + errorsList := []error{ + local.ErrorUnknownFunction(errors.New("unknown func \"dance\"")), + local.ErrorEvalExpression(errors.New("eval expr"), "badExpr(dancing.mops"), + remote.ErrRemoteTriggerResponse{ + InternalError: errors.New("user write bad target"), + Target: "bad target", + }, + } + + for i, givenErr := range errorsList { + Convey(fmt.Sprintf("Case %v: %T", i+1, givenErr), func() { + expectedCheckData := moira.CheckData{ + Score: int64(100_000), + Metrics: triggerChecker.lastCheck.Metrics, + State: moira.StateEXCEPTION, + Timestamp: triggerChecker.until, + EventTimestamp: triggerChecker.until, + LastSuccessfulCheckTimestamp: triggerChecker.lastCheck.LastSuccessfulCheckTimestamp, + Message: givenErr.Error(), + MetricsToTargetRelation: map[string]string{}, + } + + dataBase.EXPECT().PushNotificationEvent( + &moira.NotificationEvent{ + IsTriggerEvent: true, + TriggerID: triggerChecker.triggerID, + State: moira.StateEXCEPTION, + OldState: triggerChecker.lastCheck.State, + Timestamp: triggerChecker.until, + Metric: triggerChecker.trigger.Name, + }, + true, + ).Return(nil).Times(1) + dataBase.EXPECT().SetTriggerLastCheck( + triggerChecker.triggerID, + &expectedCheckData, + triggerChecker.trigger.ClusterKey(), + ).Return(nil).Times(1) + + err := triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr) + So(err, ShouldBeNil) + }) + } + }) + + Convey("with undefined err", func() { + givenErr := errors.New("some undefined error") + + checkMetrics, err := metrics.ConfigureCheckerMetrics( + metrics.NewDummyRegistry(), + []moira.ClusterKey{moira.DefaultLocalCluster}, + ).GetCheckMetricsBySource(moira.DefaultLocalCluster) + So(err, ShouldBeNil) + + triggerChecker.metrics = checkMetrics + + expectedCheckData := moira.CheckData{ + Score: int64(100_000), + Metrics: triggerChecker.lastCheck.Metrics, + State: moira.StateEXCEPTION, + Timestamp: triggerChecker.until, + EventTimestamp: triggerChecker.until, + LastSuccessfulCheckTimestamp: triggerChecker.lastCheck.LastSuccessfulCheckTimestamp, + Message: givenErr.Error(), + MetricsToTargetRelation: map[string]string{}, + } + + dataBase.EXPECT().PushNotificationEvent( + &moira.NotificationEvent{ + IsTriggerEvent: true, + TriggerID: triggerChecker.triggerID, + State: moira.StateEXCEPTION, + OldState: triggerChecker.lastCheck.State, + Timestamp: triggerChecker.until, + Metric: triggerChecker.trigger.Name, + }, + true, + ).Return(nil).Times(1) + dataBase.EXPECT().SetTriggerLastCheck( + triggerChecker.triggerID, + &expectedCheckData, + triggerChecker.trigger.ClusterKey(), + ).Return(nil).Times(1) + + err = triggerChecker.handleFetchError(newCheckData(triggerChecker.lastCheck, triggerChecker.until), givenErr) + So(err, ShouldBeNil) + }) + }) +} diff --git a/metric_source/local/errors.go b/metric_source/local/errors.go index b9605024f..da14cefd9 100644 --- a/metric_source/local/errors.go +++ b/metric_source/local/errors.go @@ -58,6 +58,14 @@ type ErrEvalExpr struct { target string } +// ErrorEvalExpression creates ErrEvalExpr with given err and target. +func ErrorEvalExpression(err error, target string) ErrEvalExpr { + return ErrEvalExpr{ + internalError: err, + target: target, + } +} + // Error is implementation of golang error interface for ErrEvalExpr struct. func (err ErrEvalExpr) Error() string { return fmt.Sprintf("failed to evaluate target '%s': %s", err.target, err.internalError.Error()) diff --git a/metric_source/local/eval.go b/metric_source/local/eval.go index 4b429d189..5b4dacf47 100644 --- a/metric_source/local/eval.go +++ b/metric_source/local/eval.go @@ -95,10 +95,7 @@ func (eval *evaluator) Eval( } else if isErrUnknownFunction(err) { err = ErrorUnknownFunction(err) } else { - err = ErrEvalExpr{ - target: exp.ToString(), - internalError: err, - } + err = ErrorEvalExpression(err, exp.ToString()) } }