Skip to content

Commit

Permalink
Automatically disable unsupported checks
Browse files Browse the repository at this point in the history
  • Loading branch information
prymitive committed Jan 7, 2025
1 parent 6eb615f commit 23e7dcc
Show file tree
Hide file tree
Showing 26 changed files with 859 additions and 124 deletions.
16 changes: 16 additions & 0 deletions cmd/pint/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,22 @@ func checkRules(ctx context.Context, workers int, isOffline bool, gen *config.Pr
summary.OnlineChecks = onlineChecksCount.Load()
summary.OfflineChecks = offlineChecksCount.Load()

for _, prom := range gen.Servers() {
for api, names := range prom.GetDisabledChecks() {
summary.MarkCheckDisabled(prom.Name(), api, names)
}
}
for _, pd := range summary.GetPrometheusDetails() {
for _, dc := range pd.DisabledChecks {
slog.Warn(
"Some checks were disabled because configured server doesn't seem to support all Prometheus APIs",
slog.String("prometheus", pd.Name),
slog.String("api", dc.API),
slog.Any("checks", dc.Checks),
)
}
}

lastRunTime.SetToCurrentTime()

return summary, nil
Expand Down
7 changes: 5 additions & 2 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@

### Changed

- Report warnings instead of errors if Prometheus server used for checks doesn't support these API
endpoints:
- When pint runs online checks against Thanos or other service that implements some, but not all, Prometheus APIs
it would reports errors due to some API requests returning `404 Not Found` HTTP errors.
Now pint will automatically disable checks that rely on Prometheus API endpoints that are not supported
and create a summary comment (when running `pint ci`) that lists disabled checks.
This applies to these API endpoints:

- `/api/v1/status/config`
- `/api/v1/status/flags`
Expand Down
5 changes: 5 additions & 0 deletions internal/checks/alerts_absent.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package checks

import (
"context"
"errors"
"fmt"
"time"

Expand Down Expand Up @@ -72,6 +73,10 @@ func (c AlertsAbsentCheck) Check(ctx context.Context, _ discovery.Path, rule par

cfg, err := c.prom.Config(ctx, 0)
if err != nil {
if errors.Is(err, promapi.ErrUnsupported) {
c.prom.DisableCheck(promapi.APIPathConfig, c.Reporter())
return problems
}
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Warning)
problems = append(problems, Problem{
Lines: rule.AlertingRule.Expr.Value.Lines,
Expand Down
14 changes: 1 addition & 13 deletions internal/checks/alerts_absent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,19 +274,7 @@ func TestAlertsAbsentCheck(t *testing.T) {
content: "- alert: foo\n expr: absent(foo)\n",
checker: newAlertsAbsentCheck,
prometheus: newSimpleProm,
problems: func(s string) []checks.Problem {
return []checks.Problem{
{
Lines: parser.LineRange{
First: 2,
Last: 2,
},
Reporter: checks.AlertsAbsentCheckName,
Text: checkUnsupported(checks.AlertsAbsentCheckName, "prom", s, promapi.APIPathConfig),
Severity: checks.Warning,
},
}
},
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{requireConfigPath},
Expand Down
5 changes: 5 additions & 0 deletions internal/checks/alerts_external_labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package checks

import (
"context"
"errors"
"fmt"

"github.com/cloudflare/pint/internal/discovery"
Expand Down Expand Up @@ -55,6 +56,10 @@ func (c AlertsExternalLabelsCheck) Check(ctx context.Context, _ discovery.Path,

cfg, err := c.prom.Config(ctx, 0)
if err != nil {
if errors.Is(err, promapi.ErrUnsupported) {
c.prom.DisableCheck(promapi.APIPathConfig, c.Reporter())
return problems
}
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
problems = append(problems, Problem{
Lines: rule.Lines,
Expand Down
14 changes: 14 additions & 0 deletions internal/checks/alerts_external_labels_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package checks_test

import (
"fmt"
"net/http"
"testing"
"time"

Expand Down Expand Up @@ -155,6 +156,19 @@ func TestAlertsExternalLabelsCountCheck(t *testing.T) {
},
},
},
{
description: "config 404",
content: content,
checker: newAlertsExternalLabelsCheck,
prometheus: newSimpleProm,
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{requireConfigPath},
resp: httpResponse{code: http.StatusNotFound, body: "Not Found"},
},
},
},
}

runTests(t, testCases)
Expand Down
3 changes: 0 additions & 3 deletions internal/checks/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,6 @@ func textAndSeverityFromError(err error, reporter, prom string, s Severity) (tex
}

switch {
case promapi.IsUnsupportedError(err):
text = fmt.Sprintf("Couldn't run `%s` checks on %s because it's %s.", reporter, promDesc, err)
severity = Warning
case promapi.IsQueryTooExpensive(err):
text = fmt.Sprintf("Couldn't run `%s` checks on %s because some queries are too expensive: `%s`.", reporter, promDesc, err)
severity = Warning
Expand Down
4 changes: 0 additions & 4 deletions internal/checks/base_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,3 @@ func checkErrorUnableToRun(c, name, uri, err string) string {
func checkErrorTooExpensiveToRun(c, name, uri, err string) string {
return fmt.Sprintf("Couldn't run `%s` checks on `%s` Prometheus server at %s because some queries are too expensive: `%s`.", c, name, uri, err)
}

func checkUnsupported(c, name, uri, path string) string {
return fmt.Sprintf("Couldn't run `%s` checks on `%s` Prometheus server at %s because it's unsupported: this server doesn't seem to support `%s` API endpoint.", c, name, uri, path)
}
5 changes: 5 additions & 0 deletions internal/checks/labels_conflict.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package checks

import (
"context"
"errors"
"fmt"

"github.com/cloudflare/pint/internal/discovery"
Expand Down Expand Up @@ -56,6 +57,10 @@ func (c LabelsConflictCheck) Check(ctx context.Context, _ discovery.Path, rule p

cfg, err := c.prom.Config(ctx, 0)
if err != nil {
if errors.Is(err, promapi.ErrUnsupported) {
c.prom.DisableCheck(promapi.APIPathConfig, c.Reporter())
return problems
}
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Warning)
problems = append(problems, Problem{
Lines: labels.Lines,
Expand Down
14 changes: 1 addition & 13 deletions internal/checks/labels_conflict_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,19 +183,7 @@ func TestLabelsConflictCheck(t *testing.T) {
content: "- record: foo\n expr: up == 0\n labels:\n foo: bar\n",
checker: newLabelsConflict,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Lines: parser.LineRange{
First: 3,
Last: 4,
},
Reporter: checks.LabelsConflictCheckName,
Text: checkUnsupported(checks.LabelsConflictCheckName, "prom", uri, promapi.APIPathConfig),
Severity: checks.Warning,
},
}
},
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{requireConfigPath},
Expand Down
5 changes: 5 additions & 0 deletions internal/checks/promql_counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package checks

import (
"context"
"errors"
"fmt"

v1 "github.com/prometheus/client_golang/api/prometheus/v1"
Expand Down Expand Up @@ -99,6 +100,10 @@ LOOP:

metadata, err := c.prom.Metadata(ctx, selector.Name)
if err != nil {
if errors.Is(err, promapi.ErrUnsupported) {
c.prom.DisableCheck(promapi.APIPathMetadata, c.Reporter())
return problems
}
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Warning)
problems = append(problems, Problem{
Lines: expr.Value.Lines,
Expand Down
5 changes: 5 additions & 0 deletions internal/checks/promql_range_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package checks

import (
"context"
"errors"
"fmt"
"time"

Expand Down Expand Up @@ -77,6 +78,10 @@ func (c RangeQueryCheck) Check(ctx context.Context, _ discovery.Path, rule parse

flags, err := c.prom.Flags(ctx)
if err != nil {
if errors.Is(err, promapi.ErrUnsupported) {
c.prom.DisableCheck(promapi.APIPathFlags, c.Reporter())
return problems
}
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Warning)
problems = append(problems, Problem{
Lines: expr.Value.Lines,
Expand Down
14 changes: 1 addition & 13 deletions internal/checks/promql_range_query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,19 +99,7 @@ func TestRangeQueryCheck(t *testing.T) {
content: "- record: foo\n expr: rate(foo[30d])\n",
checker: newRangeQueryCheck,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Lines: parser.LineRange{
First: 2,
Last: 2,
},
Reporter: "promql/range_query",
Text: checkUnsupported(checks.RangeQueryCheckName, "prom", uri, promapi.APIPathFlags),
Severity: checks.Warning,
},
}
},
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{requireFlagsPath},
Expand Down
11 changes: 11 additions & 0 deletions internal/checks/promql_rate.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package checks

import (
"context"
"errors"
"fmt"
"slices"
"time"
Expand Down Expand Up @@ -69,6 +70,10 @@ func (c RateCheck) Check(ctx context.Context, _ discovery.Path, rule parser.Rule

cfg, err := c.prom.Config(ctx, 0)
if err != nil {
if errors.Is(err, promapi.ErrUnsupported) {
c.prom.DisableCheck(promapi.APIPathConfig, c.Reporter())
return problems
}
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
problems = append(problems, Problem{
Lines: expr.Value.Lines,
Expand Down Expand Up @@ -119,6 +124,9 @@ func (c RateCheck) checkNode(ctx context.Context, node *parser.PromQLNode, entri
done.values = append(done.values, s.Name)
metadata, err := c.prom.Metadata(ctx, s.Name)
if err != nil {
if errors.Is(err, promapi.ErrUnsupported) {
continue
}
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
problems = append(problems, exprProblem{
text: text,
Expand Down Expand Up @@ -152,6 +160,9 @@ func (c RateCheck) checkNode(ctx context.Context, node *parser.PromQLNode, entri
for _, vs := range src.Selectors {
metadata, err := c.prom.Metadata(ctx, vs.Name)
if err != nil {
if errors.Is(err, promapi.ErrUnsupported) {
continue
}
text, severity := textAndSeverityFromError(err, c.Reporter(), c.prom.Name(), Bug)
problems = append(problems, exprProblem{
text: text,
Expand Down
88 changes: 79 additions & 9 deletions internal/checks/promql_rate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,15 +545,6 @@ func TestRateCheck(t *testing.T) {
Details: checks.RateCheckDetails,
Severity: checks.Bug,
},
{
Lines: parser.LineRange{
First: 2,
Last: 2,
},
Reporter: "promql/rate",
Text: checkUnsupported(checks.RateCheckName, "prom", uri, promapi.APIPathMetadata),
Severity: checks.Warning,
},
}
},
mocks: []*prometheusMock{
Expand Down Expand Up @@ -933,6 +924,85 @@ func TestRateCheck(t *testing.T) {
},
},
},
{
description: "config 404",
content: "- alert: my alert\n expr: rate(my:sum[5m])\n",
entries: mustParseContent("- record: my:sum\n expr: sum(foo)\n"),
checker: newRateCheck,
prometheus: newSimpleProm,
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{requireConfigPath},
resp: httpResponse{code: http.StatusNotFound, body: "Not Found"},
},
},
},
{
description: "metadata 404",
content: `
- alert: Plexi_Worker_High_Signing_Latency
expr: |
sum(
rate(global:response_time_sum{namespace!~"test[.].+"}[15m])
) by (environment, namespace)
/
sum(
rate(global:response_time_count{namespace!~"test[.].+"}[15m])
) by (environment, namespace)
> 3000
`,
checker: newRateCheck,
prometheus: newSimpleProm,
entries: mustParseContent(`
- record: global:response_time_sum
expr: sum(response_time_sum:rate2m)
- record: response_time_sum:rate2m
expr: rate(response_time_sum[2m])
`),
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{requireConfigPath},
resp: configResponse{yaml: "global:\n scrape_interval: 53s\n"},
},
{
conds: []requestCondition{
requireMetadataPath,
formCond{"metric", "global:response_time_sum"},
},
resp: metadataResponse{metadata: map[string][]v1.Metadata{}},
},
{
conds: []requestCondition{
requireMetadataPath,
formCond{"metric", "response_time_sum:rate2m"},
},
resp: httpResponse{code: http.StatusNotFound, body: "Not Found"},
},
},
},
{
description: "rate over non aggregate",
content: "- alert: my alert\n expr: rate(my:foo[5m])\n",
entries: mustParseContent("- record: my:foo\n expr: foo / foo\n"),
checker: newRateCheck,
prometheus: newSimpleProm,
problems: noProblems,
mocks: []*prometheusMock{
{
conds: []requestCondition{requireConfigPath},
resp: configResponse{yaml: "global:\n scrape_interval: 1m\n"},
},
{
conds: []requestCondition{
requireMetadataPath,
formCond{"metric", "my:foo"},
},
resp: metadataResponse{metadata: map[string][]v1.Metadata{}},
},
},
},
}
runTests(t, testCases)
}
Loading

0 comments on commit 23e7dcc

Please sign in to comment.