From 955663f5fb3bc4673d6766123412d37032ca7bd2 Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Fri, 20 Oct 2023 18:00:25 +0100 Subject: [PATCH] Dynamically generate the list of Prometheus servers --- cmd/pint/ci.go | 14 +- cmd/pint/lint.go | 14 +- cmd/pint/main.go | 6 - cmd/pint/metrics.go | 2 + cmd/pint/scan.go | 4 +- cmd/pint/tests/0001_match_path.txt | 1 + cmd/pint/tests/0002_nothing_to_lint.txt | 1 + cmd/pint/tests/0003_lint_workdir.txt | 1 + cmd/pint/tests/0004_fail_invalid_yaml.txt | 1 + cmd/pint/tests/0005_false_positive.txt | 1 + cmd/pint/tests/0006_rr_labels.txt | 1 + cmd/pint/tests/0007_alerts.txt | 1 + .../tests/0008_recording_rule_prometheus.txt | 1 + .../tests/0009_alerting_rule_prometheus.txt | 1 + cmd/pint/tests/0010_syntax_check.txt | 1 + cmd/pint/tests/0011_ignore_rules.txt | 1 + cmd/pint/tests/0012_issue_20.txt | 1 + cmd/pint/tests/0014_issue49_2.txt | 1 + cmd/pint/tests/0018_match_alerting.txt | 2 + cmd/pint/tests/0019_match_recording.txt | 2 + cmd/pint/tests/0020_ignore_kind.txt | 2 + cmd/pint/tests/0022_ignore_multi.txt | 1 + cmd/pint/tests/0024_color_output.txt | 1 + cmd/pint/tests/0027_ci_branch.txt | 1 + cmd/pint/tests/0028_ci_git_error.txt | 6 +- cmd/pint/tests/0029_ci_too_many_commits.txt | 1 + cmd/pint/tests/0037_disable_checks.txt | 5 +- cmd/pint/tests/0038_disable_checks_regex.txt | 1 + cmd/pint/tests/0039_prom_selected_path.txt | 5 +- cmd/pint/tests/0040_rule_match_label.txt | 2 + cmd/pint/tests/0042_watch_metrics.txt | 14 - cmd/pint/tests/0052_match_multiple.txt | 2 + cmd/pint/tests/0053_ignore_multiple.txt | 2 + cmd/pint/tests/0058_templated_check.txt | 1 + cmd/pint/tests/0060_ci_noop.txt | 1 + cmd/pint/tests/0063_lint_offline.txt | 2 + cmd/pint/tests/0066_lint_owner.txt | 1 + cmd/pint/tests/0067_relaxed.txt | 1 + cmd/pint/tests/0073_lint_k8s.txt | 1 + cmd/pint/tests/0074_strict_error.txt | 1 + cmd/pint/tests/0077_strict_error_owner.txt | 1 + cmd/pint/tests/0078_repeated_group.txt | 1 + cmd/pint/tests/0080_lint_online.txt | 2 + cmd/pint/tests/0081_rulefmt.txt | 1 + .../tests/0086_rulefmt_ignored_errors.txt | 1 + cmd/pint/tests/0087_dedup.txt | 1 + cmd/pint/tests/0088_rule_link.txt | 1 + cmd/pint/tests/0089_lint_min_severity_bug.txt | 1 + .../tests/0090_lint_min_severity_info.txt | 1 + .../tests/0091_lint_min_severity_invalid.txt | 1 + cmd/pint/tests/0092_dir_symlink.txt | 2 + cmd/pint/tests/0095_rulefmt_symlink.txt | 2 + cmd/pint/tests/0096_bad_symlink.txt | 1 + cmd/pint/tests/0099_symlink_outside_glob.txt | 2 + cmd/pint/tests/0103_file_disable.txt | 5 +- cmd/pint/tests/0108_rule_duplicate.txt | 2 + ..._rule_duplicate_multiple_proms_include.txt | 3 + ..._rule_duplicate_multiple_proms_exclude.txt | 3 + cmd/pint/tests/0111_snooze.txt | 2 + cmd/pint/tests/0112_expired_snooze.txt | 2 + cmd/pint/tests/0115_file_disable_tag.txt | 5 +- cmd/pint/tests/0116_file_snooze.txt | 2 + cmd/pint/tests/0121_rule_for.txt | 1 + cmd/pint/tests/0122_lint_owner_allowed.txt | 1 + cmd/pint/tests/0124_ci_base_branch_flag.txt | 1 + cmd/pint/tests/0125_lint_fail_on_warning.txt | 1 + cmd/pint/tests/0126_lint_fail_on_invalid.txt | 1 + ...127_lint_fail_on_fatal_but_got_warning.txt | 1 + .../tests/0128_lint_fail_on_warning_only.txt | 1 + .../tests/0134_ci_base_branch_flag_path.txt | 6 +- .../tests/0135_ci_base_branch_config_path.txt | 6 +- cmd/pint/tests/0136_annotation_regex_key.txt | 1 + .../tests/0137_annotation_regex_key_fail.txt | 1 + .../0138_annoation_regex_key_required.txt | 1 + cmd/pint/tests/0141_empty_keys.txt | 1 + cmd/pint/tests/0142_keep_firing_for.txt | 1 + cmd/pint/tests/0143_keep_firing_for.txt | 1 + cmd/pint/watch.go | 54 +- docs/examples/discovery.hcl | 39 + internal/checks/base_test.go | 6 +- .../config/__snapshots__/config_test.snap | 1478 ++++++++--------- internal/config/config.go | 79 +- internal/config/config_test.go | 27 +- internal/config/discovery.go | 253 +++ internal/config/prometheus.go | 185 ++- internal/discovery/git_blame.go | 2 + internal/discovery/git_branch.go | 2 + internal/discovery/glob.go | 3 + internal/log/handler.go | 8 +- internal/log/log_test.go | 48 + internal/promapi/config_test.go | 6 +- internal/promapi/failover.go | 44 +- internal/promapi/flags_test.go | 6 +- internal/promapi/metadata_test.go | 6 +- internal/promapi/metrics.go | 8 +- internal/promapi/prometheus.go | 5 + internal/promapi/query_test.go | 6 +- internal/promapi/range_test.go | 6 +- 98 files changed, 1506 insertions(+), 940 deletions(-) create mode 100644 docs/examples/discovery.hcl create mode 100644 internal/config/discovery.go create mode 100644 internal/log/log_test.go diff --git a/cmd/pint/ci.go b/cmd/pint/ci.go index 487d8005..1460b6ca 100644 --- a/cmd/pint/ci.go +++ b/cmd/pint/ci.go @@ -100,13 +100,17 @@ func actionCI(c *cli.Context) error { return err } - for _, prom := range meta.cfg.PrometheusServers { - prom.StartWorkers() + ctx := context.WithValue(context.Background(), config.CommandKey, config.CICommand) + + gen := config.NewPrometheusGenerator(meta.cfg, metricsRegistry) + defer gen.Stop() + + if err = gen.Discover(ctx); err != nil { + return err } - defer meta.cleanup() + slog.Debug("Generated all Prometheus servers", slog.Int("count", gen.Count())) - ctx := context.WithValue(context.Background(), config.CommandKey, config.CICommand) - summary := checkRules(ctx, meta.workers, meta.cfg, entries) + summary := checkRules(ctx, meta.workers, gen, meta.cfg, entries) if c.Bool(requireOwnerFlag) { summary.Report(verifyOwners(entries, meta.cfg.Owners.CompileAllowed())...) diff --git a/cmd/pint/lint.go b/cmd/pint/lint.go index f51aea0f..61fad034 100644 --- a/cmd/pint/lint.go +++ b/cmd/pint/lint.go @@ -60,13 +60,17 @@ func actionLint(c *cli.Context) error { return err } - for _, prom := range meta.cfg.PrometheusServers { - prom.StartWorkers() + ctx := context.WithValue(context.Background(), config.CommandKey, config.LintCommand) + + gen := config.NewPrometheusGenerator(meta.cfg, metricsRegistry) + defer gen.Stop() + + if err = gen.Discover(ctx); err != nil { + return err } - defer meta.cleanup() + slog.Debug("Generated all Prometheus servers", slog.Int("count", gen.Count())) - ctx := context.WithValue(context.Background(), config.CommandKey, config.LintCommand) - summary := checkRules(ctx, meta.workers, meta.cfg, entries) + summary := checkRules(ctx, meta.workers, gen, meta.cfg, entries) if c.Bool(requireOwnerFlag) { summary.Report(verifyOwners(entries, meta.cfg.Owners.CompileAllowed())...) diff --git a/cmd/pint/main.go b/cmd/pint/main.go index 8192bdb0..af4dcac2 100644 --- a/cmd/pint/main.go +++ b/cmd/pint/main.go @@ -82,12 +82,6 @@ type actionMeta struct { workers int } -func (meta actionMeta) cleanup() { - for _, prom := range meta.cfg.PrometheusServers { - prom.Close() - } -} - func actionSetup(c *cli.Context) (meta actionMeta, err error) { err = initLogger(c.String(logLevelFlag), c.Bool(noColorFlag)) if err != nil { diff --git a/cmd/pint/metrics.go b/cmd/pint/metrics.go index 0ddbf7cd..9b5e5eb1 100644 --- a/cmd/pint/metrics.go +++ b/cmd/pint/metrics.go @@ -3,6 +3,8 @@ package main import "github.com/prometheus/client_golang/prometheus" var ( + metricsRegistry = prometheus.NewRegistry() + pintVersion = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "pint_version", diff --git a/cmd/pint/scan.go b/cmd/pint/scan.go index 2c607c89..ef462606 100644 --- a/cmd/pint/scan.go +++ b/cmd/pint/scan.go @@ -54,7 +54,7 @@ func tryDecodingYamlError(err error) (l int, s string) { return 1, s } -func checkRules(ctx context.Context, workers int, cfg config.Config, entries []discovery.Entry) (summary reporter.Summary) { +func checkRules(ctx context.Context, workers int, gen *config.PrometheusGenerator, cfg config.Config, entries []discovery.Entry) (summary reporter.Summary) { checkIterationChecks.Set(0) checkIterationChecksDone.Set(0) @@ -108,7 +108,7 @@ func checkRules(ctx context.Context, workers int, cfg config.Config, entries []d ) } - checkList := cfg.GetChecksForRule(ctx, entry.SourcePath, entry.Rule, entry.DisabledChecks) + checkList := cfg.GetChecksForRule(ctx, gen, entry.SourcePath, entry.Rule, entry.DisabledChecks) for _, check := range checkList { checkIterationChecks.Inc() check := check diff --git a/cmd/pint/tests/0001_match_path.txt b/cmd/pint/tests/0001_match_path.txt index a442aa5f..0f979abd 100644 --- a/cmd/pint/tests/0001_match_path.txt +++ b/cmd/pint/tests/0001_match_path.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0002.yml:2 Bug: job label is required and should be preserved when aggregating "^.+$" rules, remove job from without() (promql/aggregate) 2 | expr: sum(foo) without(job) diff --git a/cmd/pint/tests/0002_nothing_to_lint.txt b/cmd/pint/tests/0002_nothing_to_lint.txt index da1f46e8..c25539c4 100644 --- a/cmd/pint/tests/0002_nothing_to_lint.txt +++ b/cmd/pint/tests/0002_nothing_to_lint.txt @@ -4,4 +4,5 @@ pint.error --no-color lint rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] level=ERROR msg="Fatal error" err="no matching files" diff --git a/cmd/pint/tests/0003_lint_workdir.txt b/cmd/pint/tests/0003_lint_workdir.txt index 4330843c..bdc8200a 100644 --- a/cmd/pint/tests/0003_lint_workdir.txt +++ b/cmd/pint/tests/0003_lint_workdir.txt @@ -5,6 +5,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:2 Warning: job label is required and should be preserved when aggregating "^.+$" rules, remove job from without() (promql/aggregate) 2 | expr: sum(rate(fl_cf_html_bytes_in[10m])) WITHOUT (colo_id, instance, node_type, region, node_status, job, colo_name) diff --git a/cmd/pint/tests/0004_fail_invalid_yaml.txt b/cmd/pint/tests/0004_fail_invalid_yaml.txt index b885b17b..2ff38c44 100644 --- a/cmd/pint/tests/0004_fail_invalid_yaml.txt +++ b/cmd/pint/tests/0004_fail_invalid_yaml.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=ERROR msg="Failed to parse file content" err="yaml: line 4: did not find expected key" path=rules/bad.yaml lines=1-7 rules/bad.yaml:4 Fatal: did not find expected key (yaml/parse) 4 | diff --git a/cmd/pint/tests/0005_false_positive.txt b/cmd/pint/tests/0005_false_positive.txt index a094bc74..d9d53937 100644 --- a/cmd/pint/tests/0005_false_positive.txt +++ b/cmd/pint/tests/0005_false_positive.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] -- rules/0001.yml -- - record: "colo:test1" expr: topk(6, sum(rate(edgeworker_subrequest_errorCount{cordon="free"}[5m])) BY (zoneId,job)) diff --git a/cmd/pint/tests/0006_rr_labels.txt b/cmd/pint/tests/0006_rr_labels.txt index 4339b602..4ec1e513 100644 --- a/cmd/pint/tests/0006_rr_labels.txt +++ b/cmd/pint/tests/0006_rr_labels.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:8 Fatal: incomplete rule, no alert or record key (yaml/parse) 8 | - expr: sum(foo) diff --git a/cmd/pint/tests/0007_alerts.txt b/cmd/pint/tests/0007_alerts.txt index 5d12da6a..a1a1ef57 100644 --- a/cmd/pint/tests/0007_alerts.txt +++ b/cmd/pint/tests/0007_alerts.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:1-2 Bug: url annotation is required (alerts/annotation) 1 | - alert: Always 2 | expr: up diff --git a/cmd/pint/tests/0008_recording_rule_prometheus.txt b/cmd/pint/tests/0008_recording_rule_prometheus.txt index 2ba93e10..7b93f403 100644 --- a/cmd/pint/tests/0008_recording_rule_prometheus.txt +++ b/cmd/pint/tests/0008_recording_rule_prometheus.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:5 Bug: instance label should be removed when aggregating "^colo(?:_.+)?:.+$" rules, remove instance from by() (promql/aggregate) 5 | expr: sum by (instance) (http_inprogress_requests) diff --git a/cmd/pint/tests/0009_alerting_rule_prometheus.txt b/cmd/pint/tests/0009_alerting_rule_prometheus.txt index 5d676f16..adcaa8de 100644 --- a/cmd/pint/tests/0009_alerting_rule_prometheus.txt +++ b/cmd/pint/tests/0009_alerting_rule_prometheus.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:11-13 Bug: link annotation is required (alerts/annotation) 11 | annotations: 12 | summary: "Instance {{ $labels.instance }} down" diff --git a/cmd/pint/tests/0010_syntax_check.txt b/cmd/pint/tests/0010_syntax_check.txt index c3e41ff4..ffbd98f4 100644 --- a/cmd/pint/tests/0010_syntax_check.txt +++ b/cmd/pint/tests/0010_syntax_check.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=ERROR msg="Failed to parse file content" err="yaml: line 6: did not find expected '-' indicator" path=rules/1.yaml lines=1-12 rules/1.yaml:6 Fatal: did not find expected '-' indicator (yaml/parse) 6 | diff --git a/cmd/pint/tests/0011_ignore_rules.txt b/cmd/pint/tests/0011_ignore_rules.txt index 4b3ca603..c5c037e7 100644 --- a/cmd/pint/tests/0011_ignore_rules.txt +++ b/cmd/pint/tests/0011_ignore_rules.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/1.yaml:5 Fatal: syntax error: unexpected right parenthesis ')' (promql/syntax) 5 | expr: sum(errors_total) by ) diff --git a/cmd/pint/tests/0012_issue_20.txt b/cmd/pint/tests/0012_issue_20.txt index ece024b6..8b6444ce 100644 --- a/cmd/pint/tests/0012_issue_20.txt +++ b/cmd/pint/tests/0012_issue_20.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=WARN msg="Tried to read more lines than present in the source file, this is likely due to ' ' usage in some rules, see https://github.com/cloudflare/pint/issues/20 for details" path=rules/1.yaml rules/1.yaml:9-13 Warning: runbook_url annotation is required (alerts/annotation) diff --git a/cmd/pint/tests/0014_issue49_2.txt b/cmd/pint/tests/0014_issue49_2.txt index aff0d574..e348a578 100644 --- a/cmd/pint/tests/0014_issue49_2.txt +++ b/cmd/pint/tests/0014_issue49_2.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] -- rules/0001.yaml -- - record: down expr: up == 0 diff --git a/cmd/pint/tests/0018_match_alerting.txt b/cmd/pint/tests/0018_match_alerting.txt index 0dd2ead9..1622463e 100644 --- a/cmd/pint/tests/0018_match_alerting.txt +++ b/cmd/pint/tests/0018_match_alerting.txt @@ -4,7 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=2 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=colo:recording lines=1-2 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/0001.yml rule=colo:recording level=DEBUG msg="Found alerting rule" path=rules/0001.yml alert=colo:alerting lines=4-5 diff --git a/cmd/pint/tests/0019_match_recording.txt b/cmd/pint/tests/0019_match_recording.txt index 7d40abdd..331cfe00 100644 --- a/cmd/pint/tests/0019_match_recording.txt +++ b/cmd/pint/tests/0019_match_recording.txt @@ -4,7 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=2 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=colo:recording lines=1-2 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/aggregate(job:true)"] path=rules/0001.yml rule=colo:recording level=DEBUG msg="Found alerting rule" path=rules/0001.yml alert=colo:alerting lines=4-5 diff --git a/cmd/pint/tests/0020_ignore_kind.txt b/cmd/pint/tests/0020_ignore_kind.txt index bda02d11..8df6b71d 100644 --- a/cmd/pint/tests/0020_ignore_kind.txt +++ b/cmd/pint/tests/0020_ignore_kind.txt @@ -4,7 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=2 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=colo:recording lines=4-5 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/aggregate(job:true)"] path=rules/0001.yml rule=colo:recording level=DEBUG msg="Found alerting rule" path=rules/0001.yml alert=colo:alerting lines=7-8 diff --git a/cmd/pint/tests/0022_ignore_multi.txt b/cmd/pint/tests/0022_ignore_multi.txt index a3ae5dc7..60885656 100644 --- a/cmd/pint/tests/0022_ignore_multi.txt +++ b/cmd/pint/tests/0022_ignore_multi.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/1.yaml:2 Warning: dropped label should be removed when aggregating "^.+$" rules, remove dropped from by() (promql/aggregate) 2 | expr: sum(errors_total) by(keep,dropped) diff --git a/cmd/pint/tests/0024_color_output.txt b/cmd/pint/tests/0024_color_output.txt index 7a1802b9..2307cec7 100644 --- a/cmd/pint/tests/0024_color_output.txt +++ b/cmd/pint/tests/0024_color_output.txt @@ -5,6 +5,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:2 Warning: job label is required and should be preserved when aggregating "^.+$" rules, remove job from without() (promql/aggregate) 2 | expr: sum(rate(fl_cf_html_bytes_in[10m])) WITHOUT (colo_id, instance, node_type, region, node_status, job, colo_name) diff --git a/cmd/pint/tests/0027_ci_branch.txt b/cmd/pint/tests/0027_ci_branch.txt index 72493099..497f00d8 100644 --- a/cmd/pint/tests/0027_ci_branch.txt +++ b/cmd/pint/tests/0027_ci_branch.txt @@ -21,6 +21,7 @@ cmp stderr ../stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check on current git branch using git blame" base=main level=INFO msg="Problems found" Fatal=1 rules.yml:2 Fatal: syntax error: unexpected identifier "bi" (promql/syntax) 2 | expr: sum(foo) bi(job) diff --git a/cmd/pint/tests/0028_ci_git_error.txt b/cmd/pint/tests/0028_ci_git_error.txt index 1d95709a..13e703a9 100644 --- a/cmd/pint/tests/0028_ci_git_error.txt +++ b/cmd/pint/tests/0028_ci_git_error.txt @@ -23,11 +23,9 @@ cmp stderr ../stderr.txt level=INFO msg="Loading configuration file" path=.pint.hcl level=DEBUG msg="Running git command" args=["rev-parse","--abbrev-ref","HEAD"] level=DEBUG msg="Got branch information" base=notmain current=v2 +level=INFO msg="Finding all rules to check on current git branch using git blame" base=notmain level=DEBUG msg="Running git command" args=["log","--format=%H","--no-abbrev-commit","--reverse","notmain..HEAD"] -level=ERROR msg="Fatal error" err="failed to get the list of commits to scan: fatal: ambiguous argument 'notmain..HEAD': unknown revision or path not in the working tree. -Use '--' to separate paths from revisions, like this: -'git [...] -- [...]' -" +level=ERROR msg="Fatal error" err="failed to get the list of commits to scan: fatal: ambiguous argument 'notmain..HEAD': unknown revision or path not in the working tree.\nUse '--' to separate paths from revisions, like this:\n'git [...] -- [...]'\n" -- src/v1.yml -- - record: rule1 expr: sum(foo) by(job) diff --git a/cmd/pint/tests/0029_ci_too_many_commits.txt b/cmd/pint/tests/0029_ci_too_many_commits.txt index 7e96954b..098f3a90 100644 --- a/cmd/pint/tests/0029_ci_too_many_commits.txt +++ b/cmd/pint/tests/0029_ci_too_many_commits.txt @@ -28,6 +28,7 @@ cmp stderr ../stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check on current git branch using git blame" base=main level=ERROR msg="Fatal error" err="number of commits to check (3) is higher than maxCommits (2), exiting" -- src/v1.yml -- - record: rule1 diff --git a/cmd/pint/tests/0037_disable_checks.txt b/cmd/pint/tests/0037_disable_checks.txt index 05608b3a..c9c5dc81 100644 --- a/cmd/pint/tests/0037_disable_checks.txt +++ b/cmd/pint/tests/0037_disable_checks.txt @@ -4,9 +4,12 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=3 -level=DEBUG msg="Starting query workers" name=prom uri=http://127.0.0.1 workers=16 +level=INFO msg="Configured new Prometheus server" name=prom uris=1 +level=DEBUG msg="Generated all Prometheus servers" count=1 level=DEBUG msg="Found alerting rule" path=rules/0001.yml alert=default-for lines=1-3 +level=DEBUG msg="Starting query workers" name=prom uri=http://127.0.0.1 workers=16 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/template","promql/fragile","promql/regexp","promql/vector_matching(prom)","rule/duplicate(prom)","labels/conflict(prom)"] path=rules/0001.yml rule=default-for level=DEBUG msg="Found recording rule" path=rules/0001.yml record=sum-job lines=5-6 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/template","promql/fragile","promql/regexp","promql/vector_matching(prom)","rule/duplicate(prom)","labels/conflict(prom)","promql/aggregate(job:true)"] path=rules/0001.yml rule=sum-job diff --git a/cmd/pint/tests/0038_disable_checks_regex.txt b/cmd/pint/tests/0038_disable_checks_regex.txt index 8431eca8..a06ebf07 100644 --- a/cmd/pint/tests/0038_disable_checks_regex.txt +++ b/cmd/pint/tests/0038_disable_checks_regex.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:6 Warning: job label is required and should be preserved when aggregating "^.+$" rules, use by(job, ...) (promql/aggregate) 6 | expr: sum(foo) diff --git a/cmd/pint/tests/0039_prom_selected_path.txt b/cmd/pint/tests/0039_prom_selected_path.txt index ead00d7a..7d08dbd4 100644 --- a/cmd/pint/tests/0039_prom_selected_path.txt +++ b/cmd/pint/tests/0039_prom_selected_path.txt @@ -4,8 +4,10 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=3 -level=DEBUG msg="Starting query workers" name=disabled uri=http://127.0.0.1:123 workers=16 +level=INFO msg="Configured new Prometheus server" name=disabled uris=1 +level=DEBUG msg="Generated all Prometheus servers" count=1 level=DEBUG msg="Found alerting rule" path=rules/0001.yml alert=first lines=1-3 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/0001.yml rule=first level=DEBUG msg="Found recording rule" path=rules/0001.yml record=second lines=5-6 @@ -16,7 +18,6 @@ rules/0001.yml:6 Warning: job label is required and should be preserved when agg 6 | expr: sum(bar) level=INFO msg="Problems found" Warning=1 -level=DEBUG msg="Stopping query workers" name=disabled uri=http://127.0.0.1:123 -- rules/0001.yml -- - alert: first expr: foo > 1 diff --git a/cmd/pint/tests/0040_rule_match_label.txt b/cmd/pint/tests/0040_rule_match_label.txt index 06f09d73..cf9b63f9 100644 --- a/cmd/pint/tests/0040_rule_match_label.txt +++ b/cmd/pint/tests/0040_rule_match_label.txt @@ -4,7 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/rules.yml rules=4 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/rules.yml record=ignore lines=1-2 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/rules.yml rule=ignore level=DEBUG msg="Found recording rule" path=rules/rules.yml record=match lines=4-7 diff --git a/cmd/pint/tests/0042_watch_metrics.txt b/cmd/pint/tests/0042_watch_metrics.txt index f798146c..65713bf7 100644 --- a/cmd/pint/tests/0042_watch_metrics.txt +++ b/cmd/pint/tests/0042_watch_metrics.txt @@ -178,17 +178,3 @@ pint_rules_parsed_total{kind="recording"} # HELP pint_version Version information # TYPE pint_version gauge pint_version{version="unknown"} -# HELP prometheus_template_text_expansion_failures_total The total number of template text expansion failures. -# TYPE prometheus_template_text_expansion_failures_total counter -prometheus_template_text_expansion_failures_total -# HELP prometheus_template_text_expansions_total The total number of template text expansions. -# TYPE prometheus_template_text_expansions_total counter -prometheus_template_text_expansions_total -# HELP promhttp_metric_handler_requests_in_flight Current number of scrapes being served. -# TYPE promhttp_metric_handler_requests_in_flight gauge -promhttp_metric_handler_requests_in_flight -# HELP promhttp_metric_handler_requests_total Total number of scrapes by HTTP status code. -# TYPE promhttp_metric_handler_requests_total counter -promhttp_metric_handler_requests_total{code="200"} -promhttp_metric_handler_requests_total{code="500"} -promhttp_metric_handler_requests_total{code="503"} diff --git a/cmd/pint/tests/0052_match_multiple.txt b/cmd/pint/tests/0052_match_multiple.txt index 25236c33..f8f22579 100644 --- a/cmd/pint/tests/0052_match_multiple.txt +++ b/cmd/pint/tests/0052_match_multiple.txt @@ -4,7 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=2 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=colo:recording lines=4-5 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/aggregate(job:true)"] path=rules/0001.yml rule=colo:recording level=DEBUG msg="Found alerting rule" path=rules/0001.yml alert=colo:alerting lines=7-8 diff --git a/cmd/pint/tests/0053_ignore_multiple.txt b/cmd/pint/tests/0053_ignore_multiple.txt index d2896516..9ac839fa 100644 --- a/cmd/pint/tests/0053_ignore_multiple.txt +++ b/cmd/pint/tests/0053_ignore_multiple.txt @@ -4,7 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=2 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=colo:recording lines=4-5 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/0001.yml rule=colo:recording level=DEBUG msg="Found alerting rule" path=rules/0001.yml alert=colo:alerting lines=7-8 diff --git a/cmd/pint/tests/0058_templated_check.txt b/cmd/pint/tests/0058_templated_check.txt index a293ab76..c46549f9 100644 --- a/cmd/pint/tests/0058_templated_check.txt +++ b/cmd/pint/tests/0058_templated_check.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:4-6 Bug: alert_for annotation is required (alerts/annotation) 4 | - alert: Instance Is Down 2 5 | expr: up == 0 diff --git a/cmd/pint/tests/0060_ci_noop.txt b/cmd/pint/tests/0060_ci_noop.txt index d234a274..774189ed 100644 --- a/cmd/pint/tests/0060_ci_noop.txt +++ b/cmd/pint/tests/0060_ci_noop.txt @@ -30,6 +30,7 @@ cmp stderr ../stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check on current git branch using git blame" base=main level=INFO msg="Problems found" Fatal=1 b.yml:2 Fatal: syntax error: unexpected identifier "bi" (promql/syntax) 2 | expr: sum(foo) bi() diff --git a/cmd/pint/tests/0063_lint_offline.txt b/cmd/pint/tests/0063_lint_offline.txt index ac0b4467..7da9aa41 100644 --- a/cmd/pint/tests/0063_lint_offline.txt +++ b/cmd/pint/tests/0063_lint_offline.txt @@ -4,6 +4,8 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] +level=INFO msg="Configured new Prometheus server" name=disabled uris=1 -- rules/ok.yml -- - record: sum:foo expr: sum(foo) diff --git a/cmd/pint/tests/0066_lint_owner.txt b/cmd/pint/tests/0066_lint_owner.txt index 196988be..dbbaeb10 100644 --- a/cmd/pint/tests/0066_lint_owner.txt +++ b/cmd/pint/tests/0066_lint_owner.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/1.yml:4-5 Bug: rule/owner comments are required in all files, please add a "# pint file/owner $owner" somewhere in this file and/or "# pint rule/owner $owner" on top of each rule (rule/owner) 4 | - alert: No Owner 5 | expr: up > 0 diff --git a/cmd/pint/tests/0067_relaxed.txt b/cmd/pint/tests/0067_relaxed.txt index d19311cb..536689cb 100644 --- a/cmd/pint/tests/0067_relaxed.txt +++ b/cmd/pint/tests/0067_relaxed.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/strict.yml:2 Fatal: cannot unmarshal !!seq into rulefmt.RuleGroups (yaml/parse) 2 | - alert: No Owner diff --git a/cmd/pint/tests/0073_lint_k8s.txt b/cmd/pint/tests/0073_lint_k8s.txt index b56980c8..06bb32f1 100644 --- a/cmd/pint/tests/0073_lint_k8s.txt +++ b/cmd/pint/tests/0073_lint_k8s.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/1.yml:22-23 Bug: summary annotation is required (alerts/annotation) 22 | - alert: Example_High_Restart_Rate 23 | expr: sum(rate(kube_pod_container_status_restarts_total{namespace="example-app"}[5m])) > ( 3/60 ) diff --git a/cmd/pint/tests/0074_strict_error.txt b/cmd/pint/tests/0074_strict_error.txt index 8cc334a0..f238963e 100644 --- a/cmd/pint/tests/0074_strict_error.txt +++ b/cmd/pint/tests/0074_strict_error.txt @@ -3,6 +3,7 @@ pint.error --no-color lint rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] rules/strict.yml:2 Fatal: field alert not found in type rulefmt.RuleGroup (yaml/parse) 2 | - alert: Conntrack_Table_Almost_Full diff --git a/cmd/pint/tests/0077_strict_error_owner.txt b/cmd/pint/tests/0077_strict_error_owner.txt index e06afcdd..b1caf27c 100644 --- a/cmd/pint/tests/0077_strict_error_owner.txt +++ b/cmd/pint/tests/0077_strict_error_owner.txt @@ -3,6 +3,7 @@ pint.error --no-color lint --require-owner rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] rules/strict.yml:4 Fatal: "foo bar": invalid field 'annotations' in recording rule (yaml/parse) 4 | - record: foo bar diff --git a/cmd/pint/tests/0078_repeated_group.txt b/cmd/pint/tests/0078_repeated_group.txt index 7d3f583c..73c22da4 100644 --- a/cmd/pint/tests/0078_repeated_group.txt +++ b/cmd/pint/tests/0078_repeated_group.txt @@ -3,6 +3,7 @@ pint.error --no-color lint --require-owner rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] rules/strict.yml:4 Fatal: groupname: "foo" is repeated in the same file (yaml/parse) 4 | - name: foo diff --git a/cmd/pint/tests/0080_lint_online.txt b/cmd/pint/tests/0080_lint_online.txt index fe34557e..f7b30334 100644 --- a/cmd/pint/tests/0080_lint_online.txt +++ b/cmd/pint/tests/0080_lint_online.txt @@ -11,6 +11,8 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] +level=INFO msg="Configured new Prometheus server" name=prom1 uris=1 level=WARN msg="No results for Prometheus uptime metric, you might have set uptime config option to a missing metric, please check your config" name=prom1 metric=prometheus_ready level=WARN msg="Using dummy Prometheus uptime metric results with no gaps" name=prom1 metric=prometheus_ready rules/1.yml:2 Warning: http_errors_total[2d] selector is trying to query Prometheus for 2d worth of metrics, but prometheus "prom1" at http://127.0.0.1:7080 is configured to only keep 1d of metrics history (promql/range_query) diff --git a/cmd/pint/tests/0081_rulefmt.txt b/cmd/pint/tests/0081_rulefmt.txt index 1843d557..5b2393fd 100644 --- a/cmd/pint/tests/0081_rulefmt.txt +++ b/cmd/pint/tests/0081_rulefmt.txt @@ -3,6 +3,7 @@ pint.error --no-color lint rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] rules/strict.yml:4 Fatal: missing expr key (yaml/parse) 4 | - record: foo diff --git a/cmd/pint/tests/0086_rulefmt_ignored_errors.txt b/cmd/pint/tests/0086_rulefmt_ignored_errors.txt index b3d33fed..a0674f06 100644 --- a/cmd/pint/tests/0086_rulefmt_ignored_errors.txt +++ b/cmd/pint/tests/0086_rulefmt_ignored_errors.txt @@ -3,6 +3,7 @@ pint.error --no-color lint rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] rules/strict.yml:4 Fatal: incomplete rule, no alert or record key (yaml/parse) 4 | - expr: MissingAlertOrRecord diff --git a/cmd/pint/tests/0087_dedup.txt b/cmd/pint/tests/0087_dedup.txt index d124d0dc..805dfbf1 100644 --- a/cmd/pint/tests/0087_dedup.txt +++ b/cmd/pint/tests/0087_dedup.txt @@ -3,6 +3,7 @@ pint.error --no-color lint rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] rules/01.yml:5 Warning: alert query doesn't have any condition, it will always fire if the metric exists (alerts/comparison) 5 | expr: sum(up{job="bar"}) / sum(foo) / sum(bar) diff --git a/cmd/pint/tests/0088_rule_link.txt b/cmd/pint/tests/0088_rule_link.txt index 3345c3d4..bea0aff5 100644 --- a/cmd/pint/tests/0088_rule_link.txt +++ b/cmd/pint/tests/0088_rule_link.txt @@ -11,6 +11,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/1.yml:10 Information: GET request for http://127.0.0.1:7088/404 returned invalid status code: 404 Not Found (rule/link) 10 | r404: http://127.0.0.1:7088/404 diff --git a/cmd/pint/tests/0089_lint_min_severity_bug.txt b/cmd/pint/tests/0089_lint_min_severity_bug.txt index 0799a403..c851e0b4 100644 --- a/cmd/pint/tests/0089_lint_min_severity_bug.txt +++ b/cmd/pint/tests/0089_lint_min_severity_bug.txt @@ -3,6 +3,7 @@ pint.ok --no-color lint --min-severity=bug rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] level=INFO msg="Problems found" Information=1 level=INFO msg="1 problem(s) not visible because of --min-severity=bug flag" -- rules/0001.yml -- diff --git a/cmd/pint/tests/0090_lint_min_severity_info.txt b/cmd/pint/tests/0090_lint_min_severity_info.txt index 2340066e..ce90e528 100644 --- a/cmd/pint/tests/0090_lint_min_severity_info.txt +++ b/cmd/pint/tests/0090_lint_min_severity_info.txt @@ -3,6 +3,7 @@ pint.ok --no-color lint --min-severity=info rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:5-7 Information: using the value of rate(errors[2m]) inside this annotation might be hard to read, consider using one of humanize template functions to make it more human friendly (alerts/template) 5 | expr: rate(errors[2m]) > 0 . diff --git a/cmd/pint/tests/0091_lint_min_severity_invalid.txt b/cmd/pint/tests/0091_lint_min_severity_invalid.txt index 693f27cf..1f06d8e6 100644 --- a/cmd/pint/tests/0091_lint_min_severity_invalid.txt +++ b/cmd/pint/tests/0091_lint_min_severity_invalid.txt @@ -3,6 +3,7 @@ pint.error --no-color lint --min-severity=xxx rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] level=ERROR msg="Fatal error" err="invalid --min-severity value: unknown severity: xxx" -- rules/0001.yml -- groups: diff --git a/cmd/pint/tests/0092_dir_symlink.txt b/cmd/pint/tests/0092_dir_symlink.txt index fab7aa8d..16221d20 100644 --- a/cmd/pint/tests/0092_dir_symlink.txt +++ b/cmd/pint/tests/0092_dir_symlink.txt @@ -8,7 +8,9 @@ pint.ok -l debug --no-color lint rules linked rules/src/rule.yaml cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules","linked","rules/src/rule.yaml"] level=DEBUG msg="File parsed" path=rules/src/rule.yaml rules=1 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/src/rule.yaml record=down lines=4-5 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/src/rule.yaml rule=down -- rules/src/rule.yaml -- diff --git a/cmd/pint/tests/0095_rulefmt_symlink.txt b/cmd/pint/tests/0095_rulefmt_symlink.txt index 5db38c0f..f9e88557 100644 --- a/cmd/pint/tests/0095_rulefmt_symlink.txt +++ b/cmd/pint/tests/0095_rulefmt_symlink.txt @@ -7,8 +7,10 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/relaxed/1.yml rules=1 level=DEBUG msg="File parsed" path=rules/strict/symlink.yml rules=1 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/relaxed/1.yml record=foo lines=1-2 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/relaxed/1.yml rule=foo level=DEBUG msg="Found recording rule" path=rules/strict/symlink.yml record=foo lines=1-2 diff --git a/cmd/pint/tests/0096_bad_symlink.txt b/cmd/pint/tests/0096_bad_symlink.txt index d8e4261b..1a153108 100644 --- a/cmd/pint/tests/0096_bad_symlink.txt +++ b/cmd/pint/tests/0096_bad_symlink.txt @@ -6,4 +6,5 @@ pint.error -l debug --no-color lint rules cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] level=ERROR msg="Fatal error" err="lstat rules/../bad.yml: no such file or directory" diff --git a/cmd/pint/tests/0099_symlink_outside_glob.txt b/cmd/pint/tests/0099_symlink_outside_glob.txt index 126f885d..8957806f 100644 --- a/cmd/pint/tests/0099_symlink_outside_glob.txt +++ b/cmd/pint/tests/0099_symlink_outside_glob.txt @@ -7,7 +7,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules/relaxed"] level=DEBUG msg="File parsed" path=rules/relaxed/1.yml rules=1 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/relaxed/1.yml record=foo lines=1-2 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/relaxed/1.yml rule=foo -- rules/relaxed/1.yml -- diff --git a/cmd/pint/tests/0103_file_disable.txt b/cmd/pint/tests/0103_file_disable.txt index ec90f72b..054f9e39 100644 --- a/cmd/pint/tests/0103_file_disable.txt +++ b/cmd/pint/tests/0103_file_disable.txt @@ -4,9 +4,12 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=1 -level=DEBUG msg="Starting query workers" name=prom uri=http://127.0.0.1:7103 workers=16 +level=INFO msg="Configured new Prometheus server" name=prom uris=1 +level=DEBUG msg="Generated all Prometheus servers" count=1 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=colo:test1 lines=9-10 +level=DEBUG msg="Starting query workers" name=prom uri=http://127.0.0.1:7103 workers=16 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/vector_matching(prom)","labels/conflict(prom)"] path=rules/0001.yml rule=colo:test1 level=DEBUG msg="Stopping query workers" name=prom uri=http://127.0.0.1:7103 -- rules/0001.yml -- diff --git a/cmd/pint/tests/0108_rule_duplicate.txt b/cmd/pint/tests/0108_rule_duplicate.txt index 9a365ec8..ee80b884 100644 --- a/cmd/pint/tests/0108_rule_duplicate.txt +++ b/cmd/pint/tests/0108_rule_duplicate.txt @@ -4,6 +4,8 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] +level=INFO msg="Configured new Prometheus server" name=prom uris=1 level=ERROR msg="Query returned an error" err="failed to query Prometheus config: Get \"http://127.0.0.1:7108/api/v1/status/config\": dial tcp 127.0.0.1:7108: connect: connection refused" uri=http://127.0.0.1:7108 query=/api/v1/status/config level=ERROR msg="Query returned an error" err="failed to query Prometheus config: Get \"http://127.0.0.1:7108/api/v1/status/config\": dial tcp 127.0.0.1:7108: connect: connection refused" uri=http://127.0.0.1:7108 query=/api/v1/status/config level=ERROR msg="Query returned an error" err="failed to query Prometheus config: Get \"http://127.0.0.1:7108/api/v1/status/config\": dial tcp 127.0.0.1:7108: connect: connection refused" uri=http://127.0.0.1:7108 query=/api/v1/status/config diff --git a/cmd/pint/tests/0109_rule_duplicate_multiple_proms_include.txt b/cmd/pint/tests/0109_rule_duplicate_multiple_proms_include.txt index e6fa66c4..caf2c170 100644 --- a/cmd/pint/tests/0109_rule_duplicate_multiple_proms_include.txt +++ b/cmd/pint/tests/0109_rule_duplicate_multiple_proms_include.txt @@ -4,6 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] +level=INFO msg="Configured new Prometheus server" name=prom1 uris=1 +level=INFO msg="Configured new Prometheus server" name=prom2 uris=1 -- rules/0001.yml -- - record: "colo:duplicate" expr: sum(foo) without(job) diff --git a/cmd/pint/tests/0110_rule_duplicate_multiple_proms_exclude.txt b/cmd/pint/tests/0110_rule_duplicate_multiple_proms_exclude.txt index 31f3bc52..ae34921a 100644 --- a/cmd/pint/tests/0110_rule_duplicate_multiple_proms_exclude.txt +++ b/cmd/pint/tests/0110_rule_duplicate_multiple_proms_exclude.txt @@ -4,6 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] +level=INFO msg="Configured new Prometheus server" name=prom1 uris=1 +level=INFO msg="Configured new Prometheus server" name=prom2 uris=1 -- rules/0001.yml -- - record: "colo:duplicate" expr: sum(foo) without(job) diff --git a/cmd/pint/tests/0111_snooze.txt b/cmd/pint/tests/0111_snooze.txt index 910935f2..17af2040 100644 --- a/cmd/pint/tests/0111_snooze.txt +++ b/cmd/pint/tests/0111_snooze.txt @@ -4,7 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=1 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=sum-job lines=2-3 level=DEBUG msg="Check snoozed by comment" check=promql/aggregate(job:true) comment="snooze 2099-11-28T10:24:18Z promql/aggregate" until="2099-11-28T10:24:18Z" snooze=promql/aggregate level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/0001.yml rule=sum-job diff --git a/cmd/pint/tests/0112_expired_snooze.txt b/cmd/pint/tests/0112_expired_snooze.txt index 601858f2..727fcb91 100644 --- a/cmd/pint/tests/0112_expired_snooze.txt +++ b/cmd/pint/tests/0112_expired_snooze.txt @@ -4,7 +4,9 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=1 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=sum-job lines=2-3 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp","promql/aggregate(job:true)"] path=rules/0001.yml rule=sum-job rules/0001.yml:3 Bug: job label is required and should be preserved when aggregating "^.+$" rules, use by(job, ...) (promql/aggregate) diff --git a/cmd/pint/tests/0115_file_disable_tag.txt b/cmd/pint/tests/0115_file_disable_tag.txt index 233ab245..f7418fd6 100644 --- a/cmd/pint/tests/0115_file_disable_tag.txt +++ b/cmd/pint/tests/0115_file_disable_tag.txt @@ -4,9 +4,12 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="File parsed" path=rules/0001.yml rules=1 -level=DEBUG msg="Starting query workers" name=prom uri=http://127.0.0.1:7103 workers=16 +level=INFO msg="Configured new Prometheus server" name=prom uris=1 +level=DEBUG msg="Generated all Prometheus servers" count=1 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=colo:test1 lines="6 8" +level=DEBUG msg="Starting query workers" name=prom uri=http://127.0.0.1:7103 workers=16 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/for","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/0001.yml rule=colo:test1 level=DEBUG msg="Stopping query workers" name=prom uri=http://127.0.0.1:7103 -- rules/0001.yml -- diff --git a/cmd/pint/tests/0116_file_snooze.txt b/cmd/pint/tests/0116_file_snooze.txt index 6f0a5af3..b643b322 100644 --- a/cmd/pint/tests/0116_file_snooze.txt +++ b/cmd/pint/tests/0116_file_snooze.txt @@ -4,9 +4,11 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] level=DEBUG msg="Check snoozed by comment" check=promql/aggregate(job:true) comment="file/snooze 2099-11-28T10:24:18Z promql/aggregate(job:true)" until="2099-11-28T10:24:18Z" snooze=promql/aggregate(job:true) level=DEBUG msg="Check snoozed by comment" check=alerts/for comment="file/snooze 2099-11-28T10:24:18Z alerts/for" until="2099-11-28T10:24:18Z" snooze=alerts/for level=DEBUG msg="File parsed" path=rules/0001.yml rules=2 +level=DEBUG msg="Generated all Prometheus servers" count=0 level=DEBUG msg="Found recording rule" path=rules/0001.yml record=sum-job lines=4-5 level=DEBUG msg="Configured checks for rule" enabled=["promql/syntax","alerts/comparison","alerts/template","promql/fragile","promql/regexp"] path=rules/0001.yml rule=sum-job level=DEBUG msg="Found alerting rule" path=rules/0001.yml alert=Down lines=7-9 diff --git a/cmd/pint/tests/0121_rule_for.txt b/cmd/pint/tests/0121_rule_for.txt index 9172cdde..097a913f 100644 --- a/cmd/pint/tests/0121_rule_for.txt +++ b/cmd/pint/tests/0121_rule_for.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:6 Bug: this alert rule must have a 'for' field with a minimum duration of 5m (rule/for) 6 | for: 3m diff --git a/cmd/pint/tests/0122_lint_owner_allowed.txt b/cmd/pint/tests/0122_lint_owner_allowed.txt index 9c02619d..3c9312db 100644 --- a/cmd/pint/tests/0122_lint_owner_allowed.txt +++ b/cmd/pint/tests/0122_lint_owner_allowed.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/1.yml:4-5 Bug: rule/owner comments are required in all files, please add a "# pint file/owner $owner" somewhere in this file and/or "# pint rule/owner $owner" on top of each rule (rule/owner) 4 | - alert: No Owner 5 | expr: up > 0 diff --git a/cmd/pint/tests/0124_ci_base_branch_flag.txt b/cmd/pint/tests/0124_ci_base_branch_flag.txt index a0dae325..53b1a904 100644 --- a/cmd/pint/tests/0124_ci_base_branch_flag.txt +++ b/cmd/pint/tests/0124_ci_base_branch_flag.txt @@ -21,6 +21,7 @@ cmp stderr ../stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check on current git branch using git blame" base=main level=INFO msg="Problems found" Fatal=1 rules.yml:2 Fatal: syntax error: unexpected identifier "bi" (promql/syntax) 2 | expr: sum(foo) bi(job) diff --git a/cmd/pint/tests/0125_lint_fail_on_warning.txt b/cmd/pint/tests/0125_lint_fail_on_warning.txt index 8fdbc6fe..96ca2bc8 100644 --- a/cmd/pint/tests/0125_lint_fail_on_warning.txt +++ b/cmd/pint/tests/0125_lint_fail_on_warning.txt @@ -10,6 +10,7 @@ groups: expr: up{job=~"xxx"} -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:5 Warning: alert query doesn't have any condition, it will always fire if the metric exists (alerts/comparison) 5 | expr: up{job=~"xxx"} diff --git a/cmd/pint/tests/0126_lint_fail_on_invalid.txt b/cmd/pint/tests/0126_lint_fail_on_invalid.txt index 6716e1e8..dfcc7b6e 100644 --- a/cmd/pint/tests/0126_lint_fail_on_invalid.txt +++ b/cmd/pint/tests/0126_lint_fail_on_invalid.txt @@ -6,4 +6,5 @@ cmp stderr stderr.txt # empty -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] level=ERROR msg="Fatal error" err="invalid --fail-on value: unknown severity: xxx" diff --git a/cmd/pint/tests/0127_lint_fail_on_fatal_but_got_warning.txt b/cmd/pint/tests/0127_lint_fail_on_fatal_but_got_warning.txt index d4ace045..cd4942a4 100644 --- a/cmd/pint/tests/0127_lint_fail_on_fatal_but_got_warning.txt +++ b/cmd/pint/tests/0127_lint_fail_on_fatal_but_got_warning.txt @@ -10,5 +10,6 @@ groups: expr: up{job="xxx"} -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] level=INFO msg="Problems found" Warning=1 level=INFO msg="1 problem(s) not visible because of --min-severity=bug flag" diff --git a/cmd/pint/tests/0128_lint_fail_on_warning_only.txt b/cmd/pint/tests/0128_lint_fail_on_warning_only.txt index faa5f74c..1b7c1346 100644 --- a/cmd/pint/tests/0128_lint_fail_on_warning_only.txt +++ b/cmd/pint/tests/0128_lint_fail_on_warning_only.txt @@ -10,6 +10,7 @@ groups: expr: up{job="xxx"} -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules"] level=INFO msg="Problems found" Warning=1 level=INFO msg="1 problem(s) not visible because of --min-severity=bug flag" level=ERROR msg="Fatal error" err="found 1 problem(s) with severity Warning or higher" diff --git a/cmd/pint/tests/0134_ci_base_branch_flag_path.txt b/cmd/pint/tests/0134_ci_base_branch_flag_path.txt index d06ed840..fa4aee51 100644 --- a/cmd/pint/tests/0134_ci_base_branch_flag_path.txt +++ b/cmd/pint/tests/0134_ci_base_branch_flag_path.txt @@ -23,11 +23,9 @@ cmp stderr ../stderr.txt level=INFO msg="Loading configuration file" path=.pint.hcl level=DEBUG msg="Running git command" args=["rev-parse","--abbrev-ref","HEAD"] level=DEBUG msg="Got branch information" base=origin/main current=v2 +level=INFO msg="Finding all rules to check on current git branch using git blame" base=origin/main level=DEBUG msg="Running git command" args=["log","--format=%H","--no-abbrev-commit","--reverse","origin/main..HEAD"] -level=ERROR msg="Fatal error" err="failed to get the list of commits to scan: fatal: ambiguous argument 'origin/main..HEAD': unknown revision or path not in the working tree. -Use '--' to separate paths from revisions, like this: -'git [...] -- [...]' -" +level=ERROR msg="Fatal error" err="failed to get the list of commits to scan: fatal: ambiguous argument 'origin/main..HEAD': unknown revision or path not in the working tree.\nUse '--' to separate paths from revisions, like this:\n'git [...] -- [...]'\n" -- src/v1.yml -- - record: rule1 expr: sum(foo) by(job) diff --git a/cmd/pint/tests/0135_ci_base_branch_config_path.txt b/cmd/pint/tests/0135_ci_base_branch_config_path.txt index 9ca6bf27..68eed447 100644 --- a/cmd/pint/tests/0135_ci_base_branch_config_path.txt +++ b/cmd/pint/tests/0135_ci_base_branch_config_path.txt @@ -23,11 +23,9 @@ cmp stderr ../stderr.txt level=INFO msg="Loading configuration file" path=.pint.hcl level=DEBUG msg="Running git command" args=["rev-parse","--abbrev-ref","HEAD"] level=DEBUG msg="Got branch information" base=origin/main current=v2 +level=INFO msg="Finding all rules to check on current git branch using git blame" base=origin/main level=DEBUG msg="Running git command" args=["log","--format=%H","--no-abbrev-commit","--reverse","origin/main..HEAD"] -level=ERROR msg="Fatal error" err="failed to get the list of commits to scan: fatal: ambiguous argument 'origin/main..HEAD': unknown revision or path not in the working tree. -Use '--' to separate paths from revisions, like this: -'git [...] -- [...]' -" +level=ERROR msg="Fatal error" err="failed to get the list of commits to scan: fatal: ambiguous argument 'origin/main..HEAD': unknown revision or path not in the working tree.\nUse '--' to separate paths from revisions, like this:\n'git [...] -- [...]'\n" -- src/v1.yml -- - record: rule1 expr: sum(foo) by(job) diff --git a/cmd/pint/tests/0136_annotation_regex_key.txt b/cmd/pint/tests/0136_annotation_regex_key.txt index d02c71e8..ad98d246 100644 --- a/cmd/pint/tests/0136_annotation_regex_key.txt +++ b/cmd/pint/tests/0136_annotation_regex_key.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] -- rules/0001.yml -- - alert: Instance Is Down 1 expr: up == 0 diff --git a/cmd/pint/tests/0137_annotation_regex_key_fail.txt b/cmd/pint/tests/0137_annotation_regex_key_fail.txt index 42ef2c45..58c09199 100644 --- a/cmd/pint/tests/0137_annotation_regex_key_fail.txt +++ b/cmd/pint/tests/0137_annotation_regex_key_fail.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:4 Bug: annotation_.* annotation value must match "^bar$" (alerts/annotation) 4 | annotation_foo: foo diff --git a/cmd/pint/tests/0138_annoation_regex_key_required.txt b/cmd/pint/tests/0138_annoation_regex_key_required.txt index 69f89251..5aa4060d 100644 --- a/cmd/pint/tests/0138_annoation_regex_key_required.txt +++ b/cmd/pint/tests/0138_annoation_regex_key_required.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:1-2 Bug: annotation_.* annotation is required (alerts/annotation) 1 | - alert: Instance Is Down 1 2 | expr: up == 0 diff --git a/cmd/pint/tests/0141_empty_keys.txt b/cmd/pint/tests/0141_empty_keys.txt index 20d096c1..4efff815 100644 --- a/cmd/pint/tests/0141_empty_keys.txt +++ b/cmd/pint/tests/0141_empty_keys.txt @@ -3,6 +3,7 @@ pint.error --no-color lint rules.yml cmp stderr stderr.txt -- stderr.txt -- +level=INFO msg="Finding all rules to check" paths=["rules.yml"] rules.yml:4 Fatal: record value cannot be empty (yaml/parse) 4 | - record: diff --git a/cmd/pint/tests/0142_keep_firing_for.txt b/cmd/pint/tests/0142_keep_firing_for.txt index c1592223..e6b4bd8b 100644 --- a/cmd/pint/tests/0142_keep_firing_for.txt +++ b/cmd/pint/tests/0142_keep_firing_for.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] -- rules/0001.yml -- - alert: Instance Is Down 1 expr: up == 0 diff --git a/cmd/pint/tests/0143_keep_firing_for.txt b/cmd/pint/tests/0143_keep_firing_for.txt index 6f6c487c..be40edf4 100644 --- a/cmd/pint/tests/0143_keep_firing_for.txt +++ b/cmd/pint/tests/0143_keep_firing_for.txt @@ -4,6 +4,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=INFO msg="Loading configuration file" path=.pint.hcl +level=INFO msg="Finding all rules to check" paths=["rules"] rules/0001.yml:6 Bug: this alert rule must have a 'keep_firing_for' field with a minimum duration of 5m (rule/for) 6 | keep_firing_for: 3m diff --git a/cmd/pint/watch.go b/cmd/pint/watch.go index 3d8f64ab..89f27693 100644 --- a/cmd/pint/watch.go +++ b/cmd/pint/watch.go @@ -22,6 +22,7 @@ import ( "github.com/cloudflare/pint/internal/reporter" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promhttp" dto "github.com/prometheus/client_model/go" "github.com/urfave/cli/v2" @@ -108,16 +109,21 @@ func actionWatch(c *cli.Context) error { // start HTTP server for metrics collector := newProblemCollector(meta.cfg, paths, minSeverity, c.Int(maxProblemsFlag)) // register all metrics - prometheus.MustRegister(collector) - prometheus.MustRegister(checkDuration) - prometheus.MustRegister(checkIterationsTotal) - prometheus.MustRegister(checkIterationChecks) - prometheus.MustRegister(checkIterationChecksDone) - prometheus.MustRegister(pintVersion) - prometheus.MustRegister(lastRunTime) - prometheus.MustRegister(lastRunDuration) - prometheus.MustRegister(rulesParsedTotal) - promapi.RegisterMetrics() + metricsRegistry.MustRegister(collector) + metricsRegistry.MustRegister(checkDuration) + metricsRegistry.MustRegister(checkIterationsTotal) + metricsRegistry.MustRegister(checkIterationChecks) + metricsRegistry.MustRegister(checkIterationChecksDone) + metricsRegistry.MustRegister(pintVersion) + metricsRegistry.MustRegister(lastRunTime) + metricsRegistry.MustRegister(lastRunDuration) + metricsRegistry.MustRegister(rulesParsedTotal) + promapi.RegisterMetrics(metricsRegistry) + + metricsRegistry.MustRegister( + collectors.NewGoCollector(), + collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), + ) // init metrics if needed pintVersion.WithLabelValues(version).Set(1) @@ -125,7 +131,10 @@ func actionWatch(c *cli.Context) error { rulesParsedTotal.WithLabelValues(config.RecordingRuleType).Add(0) rulesParsedTotal.WithLabelValues(config.InvalidRuleType).Add(0) - http.Handle("/metrics", promhttp.Handler()) + http.Handle("/metrics", promhttp.HandlerFor(metricsRegistry, promhttp.HandlerOpts{ + ErrorLog: slog.NewLogLogger(slog.Default().Handler(), slog.LevelError), + Timeout: time.Second * 20, + })) listen := c.String(listenFlag) server := http.Server{ Addr: listen, @@ -141,14 +150,12 @@ func actionWatch(c *cli.Context) error { interval := c.Duration(intervalFlag) - for _, prom := range meta.cfg.PrometheusServers { - prom.StartWorkers() - } + gen := config.NewPrometheusGenerator(meta.cfg, metricsRegistry) // start timer to run every $interval ack := make(chan bool, 1) mainCtx, mainCancel := context.WithCancel(context.WithValue(context.Background(), config.CommandKey, config.WatchCommand)) - stop := startTimer(mainCtx, meta.cfg, meta.workers, interval, ack, collector) + stop := startTimer(mainCtx, meta.workers, gen, interval, ack, collector) quit := make(chan os.Signal, 1) signal.Notify(quit, os.Interrupt, syscall.SIGINT, syscall.SIGTERM) @@ -161,9 +168,7 @@ func actionWatch(c *cli.Context) error { slog.Info("Waiting for all background tasks to finish") <-ack - for _, prom := range meta.cfg.PrometheusServers { - prom.Close() - } + gen.Stop() ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() @@ -174,7 +179,7 @@ func actionWatch(c *cli.Context) error { return nil } -func startTimer(ctx context.Context, _ config.Config, workers int, interval time.Duration, ack chan bool, collector *problemCollector) chan bool { +func startTimer(ctx context.Context, workers int, gen *config.PrometheusGenerator, interval time.Duration, ack chan bool, collector *problemCollector) chan bool { ticker := time.NewTicker(time.Second) stop := make(chan bool, 1) wasBootstrapped := false @@ -188,7 +193,7 @@ func startTimer(ctx context.Context, _ config.Config, workers int, interval time ticker.Reset(interval) wasBootstrapped = true } - if err := collector.scan(ctx, workers); err != nil { + if err := collector.scan(ctx, workers, gen); err != nil { slog.Error("Got an error when running checks", slog.Any("err", err)) } checkIterationsTotal.Inc() @@ -246,7 +251,7 @@ func newProblemCollector(cfg config.Config, paths []string, minSeverity checks.S } } -func (c *problemCollector) scan(ctx context.Context, workers int) error { +func (c *problemCollector) scan(ctx context.Context, workers int, gen *config.PrometheusGenerator) error { finder := discovery.NewGlobFinder(c.paths, c.cfg.Parser.CompileRelaxed()) // nolint: contextcheck entries, err := finder.Find() @@ -254,7 +259,12 @@ func (c *problemCollector) scan(ctx context.Context, workers int) error { return err } - s := checkRules(ctx, workers, c.cfg, entries) + if err = gen.Discover(ctx); err != nil { + return err + } + slog.Debug("Generated all Prometheus servers", slog.Int("count", gen.Count())) + + s := checkRules(ctx, workers, gen, c.cfg, entries) c.lock.Lock() defer c.lock.Unlock() diff --git a/docs/examples/discovery.hcl b/docs/examples/discovery.hcl new file mode 100644 index 00000000..d81e907c --- /dev/null +++ b/docs/examples/discovery.hcl @@ -0,0 +1,39 @@ +# Example with Prometheus server discovery. + +discovery { + + # filepath discovery will generate Prometheus servers from files on disk. + # We define a regexp and any file or directory path matching that regexp will + # generate a new Prometheus server. + filepath { + # Directory to scan for files. + directory = "/etc/prometheus/servers" + + # Regexp rule to match, with capture groups to store variables. + match = "(?P\\w+).yaml" + + # Use variables from the regex to generate a new Prometheus configuration block. + template { + name = "prometheus-$name" # We can use 'name' regexp capture group as $name. + uri = "https://prometheus-$name.example.com" + failover = [ "https://prometheus-$name-backup.example.com" ] + headers = { + "X-Auth": "secret", + "X-User": "bob" + "X-Cluster": "$${name}" # HCL will error if we use ${name} as $ must be escaped using $$. + } + timeout = "30s" + } + + template { + name = "prometheus-clone-${1}" # We can also reference regexp capture groups using index numbers ($1 is the first capture group). + uri = "https://${1}.example.com" # ${1} doesn't have to be escaped. + failover = [ "https://${1}-backup.example.com" ] + headers = { + "X-Auth": "secret", + "X-User": "bob" + } + timeout = "30s" + } + } +} diff --git a/internal/checks/base_test.go b/internal/checks/base_test.go index c7e5dba3..62401198 100644 --- a/internal/checks/base_test.go +++ b/internal/checks/base_test.go @@ -15,6 +15,7 @@ import ( "time" v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -127,8 +128,9 @@ func runTests(t *testing.T, testCases []checkTest) { prom := tc.prometheus(uri) if prom != nil { - prom.StartWorkers() - defer prom.Close() + reg := prometheus.NewRegistry() + prom.StartWorkers(reg) + defer prom.Close(reg) } entries, err := parseContent(tc.content) diff --git a/internal/config/__snapshots__/config_test.snap b/internal/config/__snapshots__/config_test.snap index 935e2425..dcda60e3 100755 --- a/internal/config/__snapshots__/config_test.snap +++ b/internal/config/__snapshots__/config_test.snap @@ -34,7 +34,7 @@ } --- -[TestGetChecksForRule/single_prometheus_server_/_path_mismatch - 1] +[TestGetChecksForRule/single_prometheus_server - 1] { "ci": { "maxCommits": 20, @@ -49,9 +49,6 @@ "concurrency": 16, "rateLimit": 100, "uptime": "up", - "include": [ - "foo.yml" - ], "required": false } ], @@ -83,7 +80,7 @@ } --- -[TestGetChecksForRule/single_prometheus_server_/_include_&_exclude - 1] +[TestGetChecksForRule/multiple_URIs - 1] { "ci": { "maxCommits": 20, @@ -94,16 +91,14 @@ { "name": "prom", "uri": "http://localhost", + "failover": [ + "http://localhost/1", + "http://localhost/2" + ], "timeout": "1s", "concurrency": 16, "rateLimit": 100, "uptime": "up", - "include": [ - ".*" - ], - "exclude": [ - "rules.yml" - ], "required": false } ], @@ -135,7 +130,7 @@ } --- -[TestGetChecksForRule/single_prometheus_server_/_excluded - 1] +[TestGetChecksForRule/two_prometheus_servers_/_disable_all_checks_via_comment - 1] { "ci": { "maxCommits": 20, @@ -144,15 +139,21 @@ "parser": {}, "prometheus": [ { - "name": "prom", - "uri": "http://localhost", + "name": "prom1", + "uri": "http://localhost/1", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "required": false + }, + { + "name": "prom2", + "uri": "http://localhost/2", "timeout": "1s", "concurrency": 16, "rateLimit": 100, "uptime": "up", - "exclude": [ - "rules.yml" - ], "required": false } ], @@ -178,19 +179,36 @@ "rule/label", "rule/link", "rule/reject" + ], + "disabled": [ + "alerts/template" ] }, "owners": {} } --- -[TestGetChecksForRule/single_empty_rule - 1] +[TestGetChecksForRule/single_prometheus_server_/_path_mismatch - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, + "prometheus": [ + { + "name": "prom", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + "foo.yml" + ], + "required": false + } + ], "checks": { "enabled": [ "alerts/annotation", @@ -215,20 +233,34 @@ "rule/reject" ] }, - "rules": [ - {} - ], "owners": {} } --- -[TestGetChecksForRule/rule_with_aggregate_checks - 1] +[TestGetChecksForRule/single_prometheus_server_/_include_&_exclude - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, + "prometheus": [ + { + "name": "prom", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + ".*" + ], + "exclude": [ + "rules.yml" + ], + "required": false + } + ], "checks": { "enabled": [ "alerts/annotation", @@ -253,38 +285,31 @@ "rule/reject" ] }, - "rules": [ - { - "aggregate": [ - { - "name": ".+", - "keep": [ - "job" - ], - "severity": "bug" - }, - { - "name": ".+", - "strip": [ - "instance", - "rack" - ], - "severity": "bug" - } - ] - } - ], "owners": {} } --- -[TestGetChecksForRule/multiple_checks_and_disable_comment - 1] +[TestGetChecksForRule/single_prometheus_server_/_excluded - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, + "prometheus": [ + { + "name": "prom", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "exclude": [ + "rules.yml" + ], + "required": false + } + ], "checks": { "enabled": [ "alerts/annotation", @@ -309,38 +334,31 @@ "rule/reject" ] }, - "rules": [ - { - "aggregate": [ - { - "name": ".+", - "keep": [ - "job" - ], - "severity": "bug" - }, - { - "name": ".+", - "strip": [ - "instance", - "rack" - ], - "severity": "bug" - } - ] - } - ], "owners": {} } --- -[TestGetChecksForRule/prometheus_check_without_prometheus_server - 1] +[TestGetChecksForRule/single_prometheus_server_/_path_match - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, + "prometheus": [ + { + "name": "prom", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + "rules.yml" + ], + "required": false + } + ], "checks": { "enabled": [ "alerts/annotation", @@ -365,25 +383,43 @@ "rule/reject" ] }, - "rules": [ - { - "cost": { - "maxSeries": 10000, - "severity": "warning" - } - } - ], "owners": {} } --- -[TestGetChecksForRule/duplicated_rules - 1] +[TestGetChecksForRule/multiple_prometheus_servers - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, + "prometheus": [ + { + "name": "prom", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + "rules.yml" + ], + "required": false + }, + { + "name": "ignore", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + "foo.+" + ], + "required": false + } + ], "checks": { "enabled": [ "alerts/annotation", @@ -408,56 +444,11 @@ "rule/reject" ] }, - "rules": [ - { - "label": [ - { - "key": "team", - "required": true, - "severity": "bug" - } - ] - }, - { - "annotation": [ - { - "key": "summary", - "required": true, - "severity": "bug" - } - ] - }, - { - "annotation": [ - { - "key": "summary", - "required": true, - "severity": "bug" - } - ], - "label": [ - { - "key": "team", - "severity": "warning" - } - ] - }, - { - "annotation": [ - { - "key": "summary", - "value": "foo.+", - "required": true, - "severity": "bug" - } - ] - } - ], "owners": {} } --- -[TestGetChecksForRule/reject_rules - 1] +[TestGetChecksForRule/single_empty_rule - 1] { "ci": { "maxCommits": 20, @@ -489,30 +480,13 @@ ] }, "rules": [ - { - "reject": [ - { - "key": "http://.+", - "label_keys": true, - "label_values": true - }, - { - "key": ".* +.*", - "label_keys": true, - "annotation_keys": true - }, - { - "annotation_values": true, - "severity": "bug" - } - ] - } + {} ], "owners": {} } --- -[TestGetChecksForRule/rule_with_label_match_/_type_mismatch - 1] +[TestGetChecksForRule/rule_with_aggregate_checks - 1] { "ci": { "maxCommits": 20, @@ -545,20 +519,20 @@ }, "rules": [ { - "match": [ + "aggregate": [ { - "kind": "alerting", - "label": { - "key": "cluster", - "value": "prod" - } - } - ], - "label": [ + "name": ".+", + "keep": [ + "job" + ], + "severity": "bug" + }, { - "key": "priority", - "value": "(1|2|3|4|5)", - "required": true, + "name": ".+", + "strip": [ + "instance", + "rack" + ], "severity": "bug" } ] @@ -568,7 +542,7 @@ } --- -[TestGetChecksForRule/rule_with_label_match_/_no_label - 1] +[TestGetChecksForRule/multiple_checks_and_disable_comment - 1] { "ci": { "maxCommits": 20, @@ -601,20 +575,20 @@ }, "rules": [ { - "match": [ + "aggregate": [ { - "kind": "alerting", - "label": { - "key": "cluster", - "value": "prod" - } - } - ], - "label": [ + "name": ".+", + "keep": [ + "job" + ], + "severity": "bug" + }, { - "key": "priority", - "value": "(1|2|3|4|5)", - "required": true, + "name": ".+", + "strip": [ + "instance", + "rack" + ], "severity": "bug" } ] @@ -624,7 +598,7 @@ } --- -[TestGetChecksForRule/rule_with_label_match_/_label_mismatch - 1] +[TestGetChecksForRule/prometheus_check_without_prometheus_server - 1] { "ci": { "maxCommits": 20, @@ -657,36 +631,49 @@ }, "rules": [ { - "match": [ - { - "kind": "alerting", - "label": { - "key": "cluster", - "value": "prod" - } - } - ], - "label": [ - { - "key": "priority", - "value": "(1|2|3|4|5)", - "required": true, - "severity": "bug" - } - ] + "cost": { + "maxSeries": 10000, + "severity": "warning" + } } ], "owners": {} } --- -[TestGetChecksForRule/rule_with_label_match_/_label_match - 1] +[TestGetChecksForRule/prometheus_check_with_prometheus_servers_and_disable_comment - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, + "prometheus": [ + { + "name": "prom1", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + "rules.yml" + ], + "required": false + }, + { + "name": "prom2", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + "rules.yml" + ], + "required": false + } + ], "checks": { "enabled": [ "alerts/annotation", @@ -713,30 +700,14 @@ }, "rules": [ { - "match": [ - { - "kind": "alerting", - "label": { - "key": "cluster", - "value": "prod" - } - } - ], - "label": [ - { - "key": "priority", - "value": "(1|2|3|4|5)", - "required": true, - "severity": "bug" - } - ] + "cost": {} } ], "owners": {} } --- -[TestGetChecksForRule/rule_with_annotation_match_/_no_annotation - 1] +[TestGetChecksForRule/duplicated_rules - 1] { "ci": { "maxCommits": 20, @@ -769,19 +740,43 @@ }, "rules": [ { - "match": [ + "label": [ { - "kind": "alerting", - "annotation": { - "key": "cluster", - "value": "prod" - } + "key": "team", + "required": true, + "severity": "bug" + } + ] + }, + { + "annotation": [ + { + "key": "summary", + "required": true, + "severity": "bug" + } + ] + }, + { + "annotation": [ + { + "key": "summary", + "required": true, + "severity": "bug" } ], "label": [ { - "key": "priority", - "value": "(1|2|3|4|5)", + "key": "team", + "severity": "warning" + } + ] + }, + { + "annotation": [ + { + "key": "summary", + "value": "foo.+", "required": true, "severity": "bug" } @@ -792,13 +787,39 @@ } --- -[TestGetChecksForRule/rule_with_annotation_match_/_annotation_mismatch - 1] +[TestGetChecksForRule/multiple_cost_checks - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, + "prometheus": [ + { + "name": "prom1", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + "rules.yml" + ], + "required": false + }, + { + "name": "prom2", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + "rules.yml" + ], + "required": false + } + ], "checks": { "enabled": [ "alerts/annotation", @@ -825,30 +846,28 @@ }, "rules": [ { - "match": [ - { - "kind": "alerting", - "annotation": { - "key": "cluster", - "value": "prod" - } - } - ], - "label": [ - { - "key": "priority", - "value": "(1|2|3|4|5)", - "required": true, - "severity": "bug" - } - ] + "cost": { + "severity": "info" + } + }, + { + "cost": { + "maxSeries": 10000, + "severity": "warning" + } + }, + { + "cost": { + "maxSeries": 20000, + "severity": "bug" + } } ], "owners": {} } --- -[TestGetChecksForRule/rule_with_annotation_match_/_annotation_match - 1] +[TestGetChecksForRule/reject_rules - 1] { "ci": { "maxCommits": 20, @@ -881,20 +900,19 @@ }, "rules": [ { - "match": [ + "reject": [ { - "kind": "alerting", - "annotation": { - "key": "cluster", - "value": "prod" - } - } - ], - "label": [ + "key": "http://.+", + "label_keys": true, + "label_values": true + }, { - "key": "priority", - "value": "(1|2|3|4|5)", - "required": true, + "key": ".* +.*", + "label_keys": true, + "annotation_keys": true + }, + { + "annotation_values": true, "severity": "bug" } ] @@ -904,137 +922,7 @@ } --- -[TestGetChecksForRule/two_checks_enabled_via_config - 1] -{ - "ci": { - "maxCommits": 20, - "baseBranch": "master" - }, - "parser": {}, - "prometheus": [ - { - "name": "prom1", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "include": [ - "rules.yml" - ], - "required": false - } - ], - "checks": { - "enabled": [ - "promql/syntax", - "alerts/count" - ] - }, - "rules": [ - { - "alerts": { - "range": "1h", - "step": "1m", - "resolve": "5m" - } - } - ], - "owners": {} -} ---- - -[TestGetChecksForRule/rule_with_ignore_block_/_mismatch - 1] -{ - "ci": { - "maxCommits": 20, - "baseBranch": "master" - }, - "parser": {}, - "prometheus": [ - { - "name": "prom1", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "include": [ - "rules.yml" - ], - "required": false - } - ], - "checks": { - "enabled": [ - "promql/syntax", - "alerts/count" - ] - }, - "rules": [ - { - "ignore": [ - { - "path": "foo.xml" - } - ], - "alerts": { - "range": "1h", - "step": "1m", - "resolve": "5m" - } - } - ], - "owners": {} -} ---- - -[TestGetChecksForRule/rule_with_ignore_block_/_match - 1] -{ - "ci": { - "maxCommits": 20, - "baseBranch": "master" - }, - "parser": {}, - "prometheus": [ - { - "name": "prom1", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "include": [ - "rules.yml" - ], - "required": false - } - ], - "checks": { - "enabled": [ - "promql/syntax", - "alerts/count" - ] - }, - "rules": [ - { - "ignore": [ - { - "path": "rules.yml" - } - ], - "alerts": { - "range": "1h", - "step": "1m", - "resolve": "5m" - } - } - ], - "owners": {} -} ---- - -[TestGetChecksForRule/for_match_/_passing - 1] +[TestGetChecksForRule/rule_with_label_match_/_type_mismatch - 1] { "ci": { "maxCommits": 20, @@ -1069,13 +957,19 @@ { "match": [ { - "for": "\u003e 15m" + "kind": "alerting", + "label": { + "key": "cluster", + "value": "prod" + } } ], - "annotation": [ + "label": [ { - "key": "summary", - "required": true + "key": "priority", + "value": "(1|2|3|4|5)", + "required": true, + "severity": "bug" } ] } @@ -1084,7 +978,7 @@ } --- -[TestGetChecksForRule/for_match_/_not_passing - 1] +[TestGetChecksForRule/rule_with_label_match_/_no_label - 1] { "ci": { "maxCommits": 20, @@ -1119,13 +1013,19 @@ { "match": [ { - "for": "\u003e 15m" + "kind": "alerting", + "label": { + "key": "cluster", + "value": "prod" + } } ], - "annotation": [ + "label": [ { - "key": "summary", - "required": true + "key": "priority", + "value": "(1|2|3|4|5)", + "required": true, + "severity": "bug" } ] } @@ -1134,7 +1034,7 @@ } --- -[TestGetChecksForRule/for_match_/_recording_rules_/_not_passing - 1] +[TestGetChecksForRule/rule_with_label_match_/_label_mismatch - 1] { "ci": { "maxCommits": 20, @@ -1169,13 +1069,19 @@ { "match": [ { - "for": "!= 15m" + "kind": "alerting", + "label": { + "key": "cluster", + "value": "prod" + } } ], - "annotation": [ + "label": [ { - "key": "summary", - "required": true + "key": "priority", + "value": "(1|2|3|4|5)", + "required": true, + "severity": "bug" } ] } @@ -1184,7 +1090,7 @@ } --- -[TestGetChecksForRule/for_ignore_/_passing - 1] +[TestGetChecksForRule/rule_with_label_match_/_label_match - 1] { "ci": { "maxCommits": 20, @@ -1217,15 +1123,21 @@ }, "rules": [ { - "ignore": [ + "match": [ { - "for": "\u003c 15m" + "kind": "alerting", + "label": { + "key": "cluster", + "value": "prod" + } } ], - "annotation": [ + "label": [ { - "key": "summary", - "required": true + "key": "priority", + "value": "(1|2|3|4|5)", + "required": true, + "severity": "bug" } ] } @@ -1234,7 +1146,7 @@ } --- -[TestGetChecksForRule/for_ignore_/_not_passing - 1] +[TestGetChecksForRule/rule_with_annotation_match_/_no_annotation - 1] { "ci": { "maxCommits": 20, @@ -1267,15 +1179,21 @@ }, "rules": [ { - "ignore": [ + "match": [ { - "for": "\u003c 15m" + "kind": "alerting", + "annotation": { + "key": "cluster", + "value": "prod" + } } ], - "annotation": [ + "label": [ { - "key": "summary", - "required": true + "key": "priority", + "value": "(1|2|3|4|5)", + "required": true, + "severity": "bug" } ] } @@ -1284,7 +1202,7 @@ } --- -[TestGetChecksForRule/for_ignore_/_recording_rules_/_passing - 1] +[TestGetChecksForRule/rule_with_annotation_match_/_annotation_mismatch - 1] { "ci": { "maxCommits": 20, @@ -1317,15 +1235,21 @@ }, "rules": [ { - "ignore": [ + "match": [ { - "for": "\u003e 0" + "kind": "alerting", + "annotation": { + "key": "cluster", + "value": "prod" + } } ], - "annotation": [ + "label": [ { - "key": "summary", - "required": true + "key": "priority", + "value": "(1|2|3|4|5)", + "required": true, + "severity": "bug" } ] } @@ -1334,7 +1258,7 @@ } --- -[TestGetChecksForRule/link - 1] +[TestGetChecksForRule/rule_with_annotation_match_/_annotation_match - 1] { "ci": { "maxCommits": 20, @@ -1367,14 +1291,20 @@ }, "rules": [ { - "link": [ + "match": [ { - "key": "https?://(.+)", - "uri": "http://localhost/$1", - "timeout": "10s", - "headers": { - "X-Auth": "xxx" - }, + "kind": "alerting", + "annotation": { + "key": "cluster", + "value": "prod" + } + } + ], + "label": [ + { + "key": "priority", + "value": "(1|2|3|4|5)", + "required": true, "severity": "bug" } ] @@ -1384,7 +1314,7 @@ } --- -[TestGetChecksForRule/two_prometheus_servers_/_disable_all_checks_via_comment - 1] +[TestGetChecksForRule/checks_disabled_via_config - 1] { "ci": { "maxCommits": 20, @@ -1394,20 +1324,14 @@ "prometheus": [ { "name": "prom1", - "uri": "http://localhost/1", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "required": false - }, - { - "name": "prom2", - "uri": "http://localhost/2", + "uri": "http://localhost", "timeout": "1s", "concurrency": 16, "rateLimit": 100, "uptime": "up", + "include": [ + "rules.yml" + ], "required": false } ], @@ -1435,14 +1359,27 @@ "rule/reject" ], "disabled": [ - "alerts/template" + "promql/rate", + "promql/vector_matching", + "promql/range_query", + "rule/duplicate", + "labels/conflict" ] }, + "rules": [ + { + "alerts": { + "range": "1h", + "step": "1m", + "resolve": "5m" + } + } + ], "owners": {} } --- -[TestGetChecksForRule/multiple_cost_checks - 1] +[TestGetChecksForRule/single_check_enabled_via_config - 1] { "ci": { "maxCommits": 20, @@ -1461,9 +1398,32 @@ "rules.yml" ], "required": false - }, + } + ], + "checks": {}, + "rules": [ { - "name": "prom2", + "alerts": { + "range": "1h", + "step": "1m", + "resolve": "5m" + } + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/two_checks_enabled_via_config - 1] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "prometheus": [ + { + "name": "prom1", "uri": "http://localhost", "timeout": "1s", "concurrency": 16, @@ -1477,44 +1437,61 @@ ], "checks": { "enabled": [ - "alerts/annotation", - "alerts/count", - "alerts/for", - "alerts/template", - "labels/conflict", - "promql/aggregate", - "alerts/comparison", - "promql/fragile", - "promql/range_query", - "promql/rate", - "promql/regexp", "promql/syntax", - "promql/vector_matching", - "query/cost", - "promql/series", - "rule/duplicate", - "rule/for", - "rule/label", - "rule/link", - "rule/reject" + "alerts/count" ] }, "rules": [ { - "cost": { - "severity": "info" + "alerts": { + "range": "1h", + "step": "1m", + "resolve": "5m" } - }, + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/rule_with_ignore_block_/_mismatch - 1] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "prometheus": [ { - "cost": { - "maxSeries": 10000, - "severity": "warning" - } - }, + "name": "prom1", + "uri": "http://localhost", + "timeout": "1s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "include": [ + "rules.yml" + ], + "required": false + } + ], + "checks": { + "enabled": [ + "promql/syntax", + "alerts/count" + ] + }, + "rules": [ { - "cost": { - "maxSeries": 20000, - "severity": "bug" + "ignore": [ + { + "path": "foo.xml" + } + ], + "alerts": { + "range": "1h", + "step": "1m", + "resolve": "5m" } } ], @@ -1522,7 +1499,7 @@ } --- -[TestGetChecksForRule/checks_disabled_via_config - 1] +[TestGetChecksForRule/rule_with_ignore_block_/_match - 1] { "ci": { "maxCommits": 20, @@ -1543,6 +1520,37 @@ "required": false } ], + "checks": { + "enabled": [ + "promql/syntax", + "alerts/count" + ] + }, + "rules": [ + { + "ignore": [ + { + "path": "rules.yml" + } + ], + "alerts": { + "range": "1h", + "step": "1m", + "resolve": "5m" + } + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing - 1] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, "checks": { "enabled": [ "alerts/annotation", @@ -1565,55 +1573,34 @@ "rule/label", "rule/link", "rule/reject" - ], - "disabled": [ - "promql/rate", - "promql/vector_matching", - "promql/range_query", - "rule/duplicate", - "labels/conflict" ] }, "rules": [ { - "alerts": { - "range": "1h", - "step": "1m", - "resolve": "5m" - } + "match": [ + { + "for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] } ], "owners": {} } --- -[TestGetChecksForRule/two_prometheus_servers_/_disable_checks_via_file/disable_comment - 1] +[TestGetChecksForRule/for_match_/_not_passing - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, - "prometheus": [ - { - "name": "prom1", - "uri": "http://localhost/1", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "required": false - }, - { - "name": "prom2", - "uri": "http://localhost/2", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "required": false - } - ], "checks": { "enabled": [ "alerts/annotation", @@ -1636,68 +1623,84 @@ "rule/label", "rule/link", "rule/reject" - ], - "disabled": [ - "alerts/template" ] }, + "rules": [ + { + "match": [ + { + "for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], "owners": {} } --- -[TestGetChecksForRule/single_check_enabled_via_config - 1] +[TestGetChecksForRule/for_match_/_passing#01 - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, - "prometheus": [ - { - "name": "prom1", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "include": [ - "rules.yml" - ], - "required": false - } - ], - "checks": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, "rules": [ { - "alerts": { - "range": "1h", - "step": "1m", - "resolve": "5m" - } + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] } ], "owners": {} } --- -[TestGetChecksForRule/single_prometheus_server - 1] +[TestGetChecksForRule/for_match_/_passing#02 - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, - "prometheus": [ - { - "name": "prom", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "required": false - } - ], "checks": { "enabled": [ "alerts/annotation", @@ -1722,31 +1725,82 @@ "rule/reject" ] }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], "owners": {} } --- -[TestGetChecksForRule/single_prometheus_server_/_path_match - 1] +[TestGetChecksForRule/for_match_/_passing#03 - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, - "prometheus": [ + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ { - "name": "prom", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "include": [ - "rules.yml" + "match": [ + { + "keep_firing_for": "\u003e 15m" + } ], - "required": false + "annotation": [ + { + "key": "summary", + "required": true + } + ] } ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_recording_rules_/_not_passing - 1] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, "checks": { "enabled": [ "alerts/annotation", @@ -1771,43 +1825,82 @@ "rule/reject" ] }, + "rules": [ + { + "match": [ + { + "for": "!= 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], "owners": {} } --- -[TestGetChecksForRule/multiple_prometheus_servers - 1] +[TestGetChecksForRule/for_ignore_/_passing - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, - "prometheus": [ - { - "name": "prom", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "include": [ - "rules.yml" - ], - "required": false - }, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ { - "name": "ignore", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "include": [ - "foo.+" + "ignore": [ + { + "for": "\u003c 15m" + } ], - "required": false + "annotation": [ + { + "key": "summary", + "required": true + } + ] } ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_ignore_/_not_passing - 1] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, "checks": { "enabled": [ "alerts/annotation", @@ -1832,32 +1925,32 @@ "rule/reject" ] }, + "rules": [ + { + "ignore": [ + { + "for": "\u003c 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], "owners": {} } --- -[TestGetChecksForRule/multiple_URIs - 1] +[TestGetChecksForRule/for_ignore_/_recording_rules_/_passing - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, - "prometheus": [ - { - "name": "prom", - "uri": "http://localhost", - "failover": [ - "http://localhost/1", - "http://localhost/2" - ], - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "required": false - } - ], "checks": { "enabled": [ "alerts/annotation", @@ -1882,43 +1975,32 @@ "rule/reject" ] }, + "rules": [ + { + "ignore": [ + { + "for": "\u003e 0" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], "owners": {} } --- -[TestGetChecksForRule/prometheus_check_with_prometheus_servers_and_disable_comment - 1] +[TestGetChecksForRule/link - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, - "prometheus": [ - { - "name": "prom1", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "include": [ - "rules.yml" - ], - "required": false - }, - { - "name": "prom2", - "uri": "http://localhost", - "timeout": "1s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "include": [ - "rules.yml" - ], - "required": false - } - ], "checks": { "enabled": [ "alerts/annotation", @@ -1945,14 +2027,24 @@ }, "rules": [ { - "cost": {} + "link": [ + { + "key": "https?://(.+)", + "uri": "http://localhost/$1", + "timeout": "10s", + "headers": { + "X-Auth": "xxx" + }, + "severity": "bug" + } + ] } ], "owners": {} } --- -[TestGetChecksForRule/two_prometheus_servers_/_snoozed_checks_via_comment - 1] +[TestGetChecksForRule/two_prometheus_servers_/_disable_checks_via_file/disable_comment - 1] { "ci": { "maxCommits": 20, @@ -2003,15 +2095,14 @@ "rule/reject" ], "disabled": [ - "alerts/template", - "promql/regexp" + "alerts/template" ] }, "owners": {} } --- -[TestGetChecksForRule/tag_disables_all_prometheus_checks - 1] +[TestGetChecksForRule/two_prometheus_servers_/_snoozed_checks_via_comment - 1] { "ci": { "maxCommits": 20, @@ -2022,36 +2113,19 @@ { "name": "prom1", "uri": "http://localhost/1", - "timeout": "2m0s", + "timeout": "1s", "concurrency": 16, "rateLimit": 100, "uptime": "up", - "tags": [ - "foo", - "disable", - "bar" - ], "required": false }, { "name": "prom2", "uri": "http://localhost/2", - "timeout": "2m0s", - "concurrency": 16, - "rateLimit": 100, - "uptime": "up", - "required": false - }, - { - "name": "prom3", - "uri": "http://localhost/3", - "timeout": "2m0s", + "timeout": "1s", "concurrency": 16, "rateLimit": 100, "uptime": "up", - "tags": [ - "foo" - ], "required": false } ], @@ -2077,6 +2151,10 @@ "rule/label", "rule/link", "rule/reject" + ], + "disabled": [ + "alerts/template", + "promql/regexp" ] }, "owners": {} @@ -2142,7 +2220,7 @@ } --- -[TestGetChecksForRule/tag_snoozes_all_prometheus_checks - 1] +[TestGetChecksForRule/tag_disables_all_prometheus_checks - 1] { "ci": { "maxCommits": 20, @@ -2214,113 +2292,50 @@ } --- -[TestGetChecksForRule/for_match_/_passing#01 - 1] +[TestGetChecksForRule/tag_snoozes_all_prometheus_checks - 1] { "ci": { "maxCommits": 20, "baseBranch": "master" }, "parser": {}, - "checks": { - "enabled": [ - "alerts/annotation", - "alerts/count", - "alerts/for", - "alerts/template", - "labels/conflict", - "promql/aggregate", - "alerts/comparison", - "promql/fragile", - "promql/range_query", - "promql/rate", - "promql/regexp", - "promql/syntax", - "promql/vector_matching", - "query/cost", - "promql/series", - "rule/duplicate", - "rule/for", - "rule/label", - "rule/link", - "rule/reject" - ] - }, - "rules": [ + "prometheus": [ { - "match": [ - { - "keep_firing_for": "\u003e 15m" - } + "name": "prom1", + "uri": "http://localhost/1", + "timeout": "2m0s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "tags": [ + "foo", + "disable", + "bar" ], - "annotation": [ - { - "key": "summary", - "required": true - } - ] - } - ], - "owners": {} -} ---- - -[TestGetChecksForRule/for_match_/_passing#02 - 1] -{ - "ci": { - "maxCommits": 20, - "baseBranch": "master" - }, - "parser": {}, - "checks": { - "enabled": [ - "alerts/annotation", - "alerts/count", - "alerts/for", - "alerts/template", - "labels/conflict", - "promql/aggregate", - "alerts/comparison", - "promql/fragile", - "promql/range_query", - "promql/rate", - "promql/regexp", - "promql/syntax", - "promql/vector_matching", - "query/cost", - "promql/series", - "rule/duplicate", - "rule/for", - "rule/label", - "rule/link", - "rule/reject" - ] - }, - "rules": [ + "required": false + }, { - "match": [ - { - "keep_firing_for": "\u003e 15m" - } + "name": "prom2", + "uri": "http://localhost/2", + "timeout": "2m0s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "required": false + }, + { + "name": "prom3", + "uri": "http://localhost/3", + "timeout": "2m0s", + "concurrency": 16, + "rateLimit": 100, + "uptime": "up", + "tags": [ + "foo" ], - "annotation": [ - { - "key": "summary", - "required": true - } - ] + "required": false } ], - "owners": {} -} ---- - -[TestGetChecksForRule/for_match_/_passing#03 - 1] -{ - "ci": { - "maxCommits": 20, - "baseBranch": "master" - }, - "parser": {}, "checks": { "enabled": [ "alerts/annotation", @@ -2345,21 +2360,6 @@ "rule/reject" ] }, - "rules": [ - { - "match": [ - { - "keep_firing_for": "\u003e 15m" - } - ], - "annotation": [ - { - "key": "summary", - "required": true - } - ] - } - ], "owners": {} } --- diff --git a/internal/config/config.go b/internal/config/config.go index fabacefc..ee5f6219 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -2,12 +2,10 @@ package config import ( "context" - "crypto/tls" "encoding/json" "fmt" "log/slog" "os" - "regexp" "strings" "time" @@ -17,7 +15,6 @@ import ( "github.com/cloudflare/pint/internal/checks" "github.com/cloudflare/pint/internal/parser" - "github.com/cloudflare/pint/internal/promapi" "github.com/hashicorp/hcl/v2" "github.com/hashicorp/hcl/v2/hclsimple" @@ -25,15 +22,15 @@ import ( ) type Config struct { - CI *CI `hcl:"ci,block" json:"ci,omitempty"` - Parser *Parser `hcl:"parser,block" json:"parser,omitempty"` - Repository *Repository `hcl:"repository,block" json:"repository,omitempty"` - Prometheus []PrometheusConfig `hcl:"prometheus,block" json:"prometheus,omitempty"` - Checks *Checks `hcl:"checks,block" json:"checks,omitempty"` - Check []Check `hcl:"check,block" json:"check,omitempty"` - Rules []Rule `hcl:"rule,block" json:"rules,omitempty"` - Owners *Owners `hcl:"owners,block" json:"owners,omitempty"` - PrometheusServers []*promapi.FailoverGroup `json:"-"` + CI *CI `hcl:"ci,block" json:"ci,omitempty"` + Parser *Parser `hcl:"parser,block" json:"parser,omitempty"` + Repository *Repository `hcl:"repository,block" json:"repository,omitempty"` + Prometheus []PrometheusConfig `hcl:"prometheus,block" json:"prometheus,omitempty"` + Discovery *Discovery `hcl:"discovery,block" json:"discovery,omitempty"` + Checks *Checks `hcl:"checks,block" json:"checks,omitempty"` + Check []Check `hcl:"check,block" json:"check,omitempty"` + Rules []Rule `hcl:"rule,block" json:"rules,omitempty"` + Owners *Owners `hcl:"owners,block" json:"owners,omitempty"` } func (cfg *Config) DisableOnlineChecks() { @@ -83,7 +80,7 @@ func (cfg Config) String() string { return string(content) } -func (cfg *Config) GetChecksForRule(ctx context.Context, path string, r parser.Rule, disabledChecks []string) []checks.RuleChecker { +func (cfg *Config) GetChecksForRule(ctx context.Context, gen *PrometheusGenerator, path string, r parser.Rule, disabledChecks []string) []checks.RuleChecker { enabled := []checks.RuleChecker{} allChecks := []checkMeta{ @@ -113,18 +110,7 @@ func (cfg *Config) GetChecksForRule(ctx context.Context, path string, r parser.R }, } - proms := []*promapi.FailoverGroup{} - for _, prom := range cfg.Prometheus { - if !prom.isEnabledForPath(path) { - continue - } - for _, p := range cfg.PrometheusServers { - if p.Name() == prom.Name { - proms = append(proms, p) - break - } - } - } + proms := gen.ServersForPath(path) for _, p := range proms { allChecks = append(allChecks, checkMeta{ @@ -296,51 +282,12 @@ func Load(path string, failOnMissing bool) (cfg Config, err error) { } promNames = append(promNames, prom.Name) - var timeout time.Duration - if prom.Timeout != "" { - timeout, _ = parseDuration(prom.Timeout) - } else { - timeout = time.Minute * 2 - cfg.Prometheus[i].Timeout = timeout.String() - } - - concurrency := prom.Concurrency - if concurrency <= 0 { - concurrency = 16 - cfg.Prometheus[i].Concurrency = concurrency - } + cfg.Prometheus[i].applyDefaults() - rateLimit := prom.RateLimit - if rateLimit <= 0 { - rateLimit = 100 - cfg.Prometheus[i].RateLimit = rateLimit - } - - uptime := prom.Uptime - if uptime == "" { - uptime = "up" - cfg.Prometheus[i].Uptime = uptime - } - - var tlsConf *tls.Config - tlsConf, err = prom.getTLSConfig() + _, err = prom.TLS.toHTTPConfig() if err != nil { return cfg, fmt.Errorf("invalid prometheus TLS configuration: %w", err) } - upstreams := []*promapi.Prometheus{ - promapi.NewPrometheus(prom.Name, prom.URI, prom.Headers, timeout, concurrency, rateLimit, tlsConf), - } - for _, uri := range prom.Failover { - upstreams = append(upstreams, promapi.NewPrometheus(prom.Name, uri, prom.Headers, timeout, concurrency, rateLimit, tlsConf)) - } - var include, exclude []*regexp.Regexp - for _, path := range prom.Include { - include = append(include, strictRegex(path)) - } - for _, path := range prom.Exclude { - exclude = append(exclude, strictRegex(path)) - } - cfg.PrometheusServers = append(cfg.PrometheusServers, promapi.NewFailoverGroup(prom.Name, upstreams, prom.Required, uptime, include, exclude, prom.Tags)) } for _, rule := range cfg.Rules { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index d091dbbc..9f332e9c 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -8,6 +8,7 @@ import ( "testing" "github.com/gkampitakis/go-snaps/snaps" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "github.com/cloudflare/pint/internal/checks" @@ -43,6 +44,11 @@ prometheus "prom" { cfg, err := config.Load(path, true) require.NoError(t, err) + + gen := config.NewPrometheusGenerator(cfg, prometheus.NewRegistry()) + defer gen.Stop() + require.NoError(t, gen.Discover(context.Background())) + require.Empty(t, cfg.Checks.Disabled) cfg.DisableOnlineChecks() @@ -59,6 +65,11 @@ func TestDisableOnlineChecksWithoutPrometheus(t *testing.T) { cfg, err := config.Load(path, true) require.NoError(t, err) + + gen := config.NewPrometheusGenerator(cfg, prometheus.NewRegistry()) + defer gen.Stop() + require.NoError(t, gen.Discover(context.Background())) + require.Empty(t, cfg.Checks.Disabled) cfg.DisableOnlineChecks() @@ -80,6 +91,11 @@ prometheus "prom" { cfg, err := config.Load(path, true) require.NoError(t, err) + + gen := config.NewPrometheusGenerator(cfg, prometheus.NewRegistry()) + defer gen.Stop() + require.NoError(t, gen.Discover(context.Background())) + require.Empty(t, cfg.Checks.Disabled) cfg.SetDisabledChecks([]string{checks.SyntaxCheckName}) @@ -102,6 +118,11 @@ func TestSetDisabledChecks(t *testing.T) { cfg, err := config.Load(path, true) require.NoError(t, err) + + gen := config.NewPrometheusGenerator(cfg, prometheus.NewRegistry()) + defer gen.Stop() + require.NoError(t, gen.Discover(context.Background())) + require.Empty(t, cfg.Checks.Disabled) cfg.SetDisabledChecks([]string{checks.SyntaxCheckName}) @@ -1458,7 +1479,11 @@ prometheus "prom3" { cfg, err := config.Load(path, false) require.NoError(t, err) - checks := cfg.GetChecksForRule(ctx, tc.path, tc.rule, tc.disabledChecks) + gen := config.NewPrometheusGenerator(cfg, prometheus.NewRegistry()) + defer gen.Stop() + require.NoError(t, gen.Discover(ctx)) + + checks := cfg.GetChecksForRule(ctx, gen, tc.path, tc.rule, tc.disabledChecks) checkNames := make([]string, 0, len(checks)) for _, c := range checks { checkNames = append(checkNames, c.String()) diff --git a/internal/config/discovery.go b/internal/config/discovery.go new file mode 100644 index 00000000..4bc1b100 --- /dev/null +++ b/internal/config/discovery.go @@ -0,0 +1,253 @@ +package config + +import ( + "context" + "fmt" + "io/fs" + "log/slog" + "path/filepath" + "regexp" + "slices" + "strings" + "time" + + "github.com/cloudflare/pint/internal/promapi" +) + +type Discoverer interface { + Discover(_ context.Context) ([]*promapi.FailoverGroup, error) +} + +func isEqualFailoverGroup(a, b *promapi.FailoverGroup) bool { + if a.Name() != b.Name() { + return false + } + if a.UptimeMetric() != b.UptimeMetric() { + return false + } + if !slices.Equal(a.Tags(), b.Tags()) { + return false + } + return true +} + +type Discovery struct { + FilePath []FilePath `hcl:"filepath,block" json:"filepath,omitempty"` + PrometheusQuery []PrometheusQuery `hcl:"prometheusQuery,block" json:"prometheusQuery,omitempty"` +} + +func (d *Discovery) discover(ctx context.Context, pd Discoverer, servers []*promapi.FailoverGroup) ([]*promapi.FailoverGroup, error) { + ds, err := pd.Discover(ctx) + if err != nil { + return nil, err + } + return d.merge(servers, ds) +} + +func (d *Discovery) Discover(ctx context.Context) ([]*promapi.FailoverGroup, error) { + var err error + servers := []*promapi.FailoverGroup{} + for _, pd := range d.FilePath { + servers, err = d.discover(ctx, pd, servers) + if err != nil { + return nil, err + } + } + for _, pd := range d.PrometheusQuery { + servers, err = d.discover(ctx, pd, servers) + if err != nil { + return nil, err + } + } + return servers, nil +} + +func (d *Discovery) merge(dst, src []*promapi.FailoverGroup) ([]*promapi.FailoverGroup, error) { + for _, ns := range src { + var found bool + for _, ol := range dst { + if isEqualFailoverGroup(ns, ol) { + found = true + ol.MergeUpstreams(ns) + } + } + if !found { + dst = append(dst, ns) + } + } + return dst, nil +} + +type PrometheusTemplate struct { + Name string `hcl:"name" json:"name"` + URI string `hcl:"uri" json:"uri"` + Headers map[string]string `hcl:"headers,optional" json:"headers,omitempty"` + Failover []string `hcl:"failover,optional" json:"failover,omitempty"` + Timeout string `hcl:"timeout,optional" json:"timeout"` + Concurrency int `hcl:"concurrency,optional" json:"concurrency"` + RateLimit int `hcl:"rateLimit,optional" json:"rateLimit"` + Uptime string `hcl:"uptime,optional" json:"uptime"` + Include []string `hcl:"include,optional" json:"include,omitempty"` + Exclude []string `hcl:"exclude,optional" json:"exclude,omitempty"` + Tags []string `hcl:"tags,optional" json:"tags,omitempty"` + Required bool `hcl:"required,optional" json:"required"` + TLS *TLSConfig `hcl:"tls,block" json:"tls,omitempty"` +} + +func (pt *PrometheusTemplate) Render(re *regexp.Regexp, path string) (*promapi.FailoverGroup, error) { + failover := make([]string, 0, len(pt.Failover)) + for _, f := range pt.Failover { + failover = append(failover, re.ReplaceAllString(path, f)) + } + + headerNames := make([]string, 0, len(pt.Headers)) + headers := make(map[string]string, len(pt.Headers)) + for k, v := range pt.Headers { + name := re.ReplaceAllString(path, k) + headerNames = append(headerNames, name) + headers[name] = re.ReplaceAllString(path, v) + } + + tags := make([]string, 0, len(pt.Tags)) + for _, t := range pt.Tags { + tags = append(tags, re.ReplaceAllString(path, t)) + } + + prom := PrometheusConfig{ + Name: re.ReplaceAllString(path, pt.Name), + URI: re.ReplaceAllString(path, pt.URI), + Headers: headers, + Failover: failover, + Timeout: pt.Timeout, + Concurrency: pt.Concurrency, + RateLimit: pt.RateLimit, + Uptime: pt.Uptime, + Include: pt.Include, + Exclude: pt.Exclude, + Tags: tags, + Required: pt.Required, + TLS: pt.TLS, + } + prom.applyDefaults() + slog.Debug( + "Rendered Prometheus server", + slog.String("name", prom.Name), + slog.String("uri", prom.URI), + slog.Any("headers", headerNames), + slog.String("timeout", prom.Timeout), + slog.Int("concurrency", prom.Concurrency), + slog.Int("rateLimit", prom.RateLimit), + slog.String("uptime", prom.Uptime), + slog.Any("tags", prom.Tags), + slog.Bool("required", prom.Required), + ) + + return newFailoverGroup(prom), nil +} + +type FilePath struct { + Directory string `hcl:"directory" json:"directory"` + Match string `hcl:"match" json:"match"` + Ignore []string `hcl:"ignore,optional" json:"ignore,omitempty"` + Template []PrometheusTemplate `hcl:"template,block" json:"template"` +} + +func (fp FilePath) isIgnored(path string) bool { + for _, pattern := range fp.Ignore { + if strictRegex(pattern).MatchString(path) { + return true + } + } + return false +} + +func (fp FilePath) Discover(_ context.Context) ([]*promapi.FailoverGroup, error) { + re := strictRegex(fp.Match) + servers := []*promapi.FailoverGroup{} + slog.Info( + "Finding Prometheus servers using file paths", + slog.String("dir", fp.Directory), + slog.String("match", re.String()), + ) + err := filepath.WalkDir(fp.Directory, + func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + path, err = filepath.Rel(fp.Directory, path) + if err != nil { + return err + } + if fp.isIgnored(path) { + return nil + } + if re.MatchString(path) { + slog.Debug( + "Path discovery match", + slog.String("match", re.String()), + slog.String("path", path), + ) + for _, t := range fp.Template { + server, err := t.Render(re, path) + if err != nil { + return fmt.Errorf("Failed to generate Prometheus config from a template: %w", err) + } + servers = append(servers, server) + } + } + return nil + }) + + return servers, err +} + +type PrometheusQuery struct { + URI string `hcl:"uri" json:"uri"` + Headers map[string]string `hcl:"headers,optional" json:"headers,omitempty"` + Timeout string `hcl:"timeout,optional" json:"timeout"` + TLS *TLSConfig `hcl:"tls,block" json:"tls,omitempty"` + Query string `hcl:"query" json:"query"` + Template []PrometheusTemplate `hcl:"template,block" json:"template"` +} + +func (pq PrometheusQuery) Discover(ctx context.Context) ([]*promapi.FailoverGroup, error) { + if pq.Timeout == "" { + pq.Timeout = (time.Minute * 2).String() + } + + timeout, _ := parseDuration(pq.Timeout) + tls, _ := pq.TLS.toHTTPConfig() + + prom := promapi.NewPrometheus("discovery", pq.URI, pq.Headers, timeout, 1, 100, tls) + prom.StartWorkers() + defer prom.Close() + + slog.Info( + "Finding Prometheus servers using Prometheus API query", + slog.String("uri", prom.SafeURI()), + slog.String("query", pq.Query), + ) + res, err := prom.Query(ctx, pq.Query) + if err != nil { + return nil, fmt.Errorf("failed to execute Prometheus discovery query: %w", err) + } + + servers := []*promapi.FailoverGroup{} + for _, s := range res.Series { + var reBuilder, metBuilder strings.Builder + for _, l := range s.Labels { + reBuilder.WriteString(fmt.Sprintf("%s=\"(?P<%s>\\w+)\" ", l.Name, l.Name)) + metBuilder.WriteString(fmt.Sprintf("%s=%q ", l.Name, l.Value)) + } + re := strictRegex(reBuilder.String()) + for _, t := range pq.Template { + server, err := t.Render(re, metBuilder.String()) + if err != nil { + return nil, fmt.Errorf("Failed to generate Prometheus config from a template: %w", err) + } + servers = append(servers, server) + } + } + + return servers, nil +} diff --git a/internal/config/prometheus.go b/internal/config/prometheus.go index 1de4720b..35f534f3 100644 --- a/internal/config/prometheus.go +++ b/internal/config/prometheus.go @@ -1,14 +1,21 @@ package config import ( + "context" "crypto/tls" "crypto/x509" "errors" "fmt" "go/parser" + "log/slog" "os" "regexp" "strings" + "time" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/cloudflare/pint/internal/promapi" ) type TLSConfig struct { @@ -19,6 +26,50 @@ type TLSConfig struct { InsecureSkipVerify bool `hcl:"skipVerify,optional" json:"skipVerify,omitempty"` } +func (t *TLSConfig) toHTTPConfig() (*tls.Config, error) { + if t == nil { + return nil, nil + } + + var isDirty bool + cfg := &tls.Config{} + + if t.ServerName != "" { + cfg.ServerName = t.ServerName + isDirty = true + } + + if t.CaCert != "" { + caCert, err := os.ReadFile(t.CaCert) + if err != nil { + return nil, err + } + cfg.RootCAs = x509.NewCertPool() + cfg.RootCAs.AppendCertsFromPEM(caCert) + isDirty = true + } + + if t.ClientCert != "" && t.ClientKey != "" { + cert, err := tls.LoadX509KeyPair(t.ClientCert, t.ClientKey) + if err != nil { + return nil, err + } + cfg.Certificates = []tls.Certificate{cert} + isDirty = true + } + + if t.InsecureSkipVerify { + cfg.InsecureSkipVerify = true + isDirty = true + } + + if isDirty { + return cfg, nil + } + + return nil, nil +} + type PrometheusConfig struct { Name string `hcl:",label" json:"name"` URI string `hcl:"uri" json:"uri"` @@ -81,65 +132,119 @@ func (pc PrometheusConfig) validate() error { return nil } -func (pc PrometheusConfig) isEnabledForPath(path string) bool { - if len(pc.Include) == 0 && len(pc.Exclude) == 0 { - return true +func (pc *PrometheusConfig) applyDefaults() { + if pc.Timeout == "" { + pc.Timeout = (time.Minute * 2).String() } - for _, pattern := range pc.Exclude { - re := strictRegex(pattern) - if re.MatchString(path) { - return false - } + + if pc.Concurrency <= 0 { + pc.Concurrency = 16 } - for _, pattern := range pc.Include { - re := strictRegex(pattern) - if re.MatchString(path) { - return true - } + + if pc.RateLimit <= 0 { + pc.RateLimit = 100 + } + + if pc.Uptime == "" { + pc.Uptime = "up" } - return false } -func (pc PrometheusConfig) getTLSConfig() (*tls.Config, error) { - if pc.TLS == nil { - return nil, nil +func newFailoverGroup(prom PrometheusConfig) *promapi.FailoverGroup { + timeout, _ := parseDuration(prom.Timeout) + + var tlsConf *tls.Config + tlsConf, _ = prom.TLS.toHTTPConfig() + upstreams := []*promapi.Prometheus{ + promapi.NewPrometheus(prom.Name, prom.URI, prom.Headers, timeout, prom.Concurrency, prom.RateLimit, tlsConf), + } + for _, uri := range prom.Failover { + upstreams = append(upstreams, promapi.NewPrometheus(prom.Name, uri, prom.Headers, timeout, prom.Concurrency, prom.RateLimit, tlsConf)) } + include := make([]*regexp.Regexp, 0, len(prom.Include)) + for _, path := range prom.Include { + include = append(include, strictRegex(path)) + } + exclude := make([]*regexp.Regexp, 0, len(prom.Exclude)) + for _, path := range prom.Exclude { + exclude = append(exclude, strictRegex(path)) + } + return promapi.NewFailoverGroup(prom.Name, upstreams, prom.Required, prom.Uptime, include, exclude, prom.Tags) +} - var isDirty bool - cfg := &tls.Config{} +func NewPrometheusGenerator(cfg Config, metricsRegistry *prometheus.Registry) *PrometheusGenerator { + return &PrometheusGenerator{ + metricsRegistry: metricsRegistry, + cfg: cfg, + } +} - if pc.TLS.ServerName != "" { - cfg.ServerName = pc.TLS.ServerName - isDirty = true +type PrometheusGenerator struct { + servers []*promapi.FailoverGroup + metricsRegistry *prometheus.Registry + cfg Config +} + +func (pg *PrometheusGenerator) Count() int { + return len(pg.servers) +} + +func (pg *PrometheusGenerator) Stop() { + for _, server := range pg.servers { + server.Close(pg.metricsRegistry) } + pg.servers = nil +} - if pc.TLS.CaCert != "" { - caCert, err := os.ReadFile(pc.TLS.CaCert) - if err != nil { - return nil, err +func (pg *PrometheusGenerator) ServersForPath(path string) []*promapi.FailoverGroup { + var servers []*promapi.FailoverGroup + for _, server := range pg.servers { + if server.IsEnabledForPath(path) { + server.StartWorkers(pg.metricsRegistry) + servers = append(servers, server) } - cfg.RootCAs = x509.NewCertPool() - cfg.RootCAs.AppendCertsFromPEM(caCert) - isDirty = true } + return servers +} + +func (pg *PrometheusGenerator) addServer(server *promapi.FailoverGroup) error { + for _, s := range pg.servers { + if s.Name() == server.Name() { + return fmt.Errorf("Duplicated name for Prometheus server definition: %s", s.Name()) + } + } + pg.servers = append(pg.servers, server) + return nil +} - if pc.TLS.ClientCert != "" && pc.TLS.ClientKey != "" { - cert, err := tls.LoadX509KeyPair(pc.TLS.ClientCert, pc.TLS.ClientKey) +func (pg *PrometheusGenerator) Discover(ctx context.Context) (err error) { + for _, pc := range pg.cfg.Prometheus { + err = pg.addServer(newFailoverGroup(pc)) if err != nil { - return nil, err + return err } - cfg.Certificates = []tls.Certificate{cert} - isDirty = true } - if pc.TLS.InsecureSkipVerify { - cfg.InsecureSkipVerify = true - isDirty = true + if pg.cfg.Discovery != nil { + servers, err := pg.cfg.Discovery.Discover(ctx) + if err != nil { + return err + } + for _, server := range servers { + err = pg.addServer(server) + if err != nil { + return err + } + } } - if isDirty { - return cfg, nil + for _, server := range pg.servers { + slog.Info( + "Configured new Prometheus server", + slog.String("name", server.Name()), + slog.Int("uris", server.ServerCount()), + ) } - return nil, nil + return nil } diff --git a/internal/discovery/git_blame.go b/internal/discovery/git_blame.go index 88cc4b1c..46c7ede3 100644 --- a/internal/discovery/git_blame.go +++ b/internal/discovery/git_blame.go @@ -38,6 +38,8 @@ type GitBlameFinder struct { } func (f GitBlameFinder) Find() (entries []Entry, err error) { + slog.Info("Finding all rules to check on current git branch using git blame", slog.String("base", f.baseBranch)) + cr, err := git.CommitRange(f.gitCmd, f.baseBranch) if err != nil { return nil, fmt.Errorf("failed to get the list of commits to scan: %w", err) diff --git a/internal/discovery/git_branch.go b/internal/discovery/git_branch.go index 51d7105c..5b789560 100644 --- a/internal/discovery/git_branch.go +++ b/internal/discovery/git_branch.go @@ -42,6 +42,8 @@ type GitBranchFinder struct { } func (f GitBranchFinder) Find() (entries []Entry, err error) { + slog.Info("Finding all rules to check on current git branch using logical diff", slog.String("base", f.baseBranch)) + cr, err := git.CommitRange(f.gitCmd, f.baseBranch) if err != nil { return nil, fmt.Errorf("failed to get the list of commits to scan: %w", err) diff --git a/internal/discovery/glob.go b/internal/discovery/glob.go index b652a58c..f3c60463 100644 --- a/internal/discovery/glob.go +++ b/internal/discovery/glob.go @@ -3,6 +3,7 @@ package discovery import ( "fmt" "io/fs" + "log/slog" "os" "path/filepath" "regexp" @@ -21,6 +22,8 @@ type GlobFinder struct { } func (f GlobFinder) Find() (entries []Entry, err error) { + slog.Info("Finding all rules to check", slog.Any("paths", f.patterns)) + paths := filePaths{} for _, p := range f.patterns { matches, err := filepath.Glob(p) diff --git a/internal/log/handler.go b/internal/log/handler.go index 59079119..b218bcf6 100644 --- a/internal/log/handler.go +++ b/internal/log/handler.go @@ -118,12 +118,12 @@ func (h *handler) appendAttr(buf *bytes.Buffer, attr slog.Attr) { case slog.KindAny: switch attr.Value.Any().(type) { case error: - h.printVal(buf, attr.Value.String(), fgHiRed) + h.printVal(buf, formatString(attr), fgHiRed) default: h.printVal(buf, formatAny(attr), fgHiCyan) } case slog.KindString: - h.printVal(buf, attr.Value.String(), fgHiCyan) + h.printVal(buf, formatString(attr), fgHiCyan) default: h.printVal(buf, formatAny(attr), fgHiBlue) } @@ -136,3 +136,7 @@ func formatAny(attr slog.Attr) string { } return string(data) } + +func formatString(attr slog.Attr) string { + return strings.ReplaceAll(attr.Value.String(), "\n", "\\n") +} diff --git a/internal/log/log_test.go b/internal/log/log_test.go new file mode 100644 index 00000000..cfdb7be4 --- /dev/null +++ b/internal/log/log_test.go @@ -0,0 +1,48 @@ +package log_test + +import ( + "log/slog" + "testing" + + "github.com/cloudflare/pint/internal/log" + + "github.com/stretchr/testify/require" +) + +func TestParseLevel(t *testing.T) { + type testCaseT struct { + s string + level slog.Level + err string + } + + testCases := []testCaseT{ + {s: "xxx", level: slog.LevelInfo, err: `"xxx" is not a valid log level`}, + {s: "err", level: slog.LevelInfo, err: `"err" is not a valid log level`}, + {s: "DEB", level: slog.LevelInfo, err: `"DEB" is not a valid log level`}, + {s: "error", level: slog.LevelError}, + {s: "Error", level: slog.LevelError}, + {s: "ERROR", level: slog.LevelError}, + {s: "warn", level: slog.LevelWarn}, + {s: "Warn", level: slog.LevelWarn}, + {s: "WARN", level: slog.LevelWarn}, + {s: "info", level: slog.LevelInfo}, + {s: "Info", level: slog.LevelInfo}, + {s: "INFO", level: slog.LevelInfo}, + {s: "debug", level: slog.LevelDebug}, + {s: "Debug", level: slog.LevelDebug}, + {s: "DEBUG", level: slog.LevelDebug}, + } + + for _, tc := range testCases { + t.Run(tc.s, func(t *testing.T) { + l, err := log.ParseLevel(tc.s) + if tc.err != "" { + require.EqualError(t, err, tc.err) + } else { + require.NoError(t, err) + require.Equal(t, tc.level, l) + } + }) + } +} diff --git a/internal/promapi/config_test.go b/internal/promapi/config_test.go index bacb56de..41dbf2f0 100644 --- a/internal/promapi/config_test.go +++ b/internal/promapi/config_test.go @@ -10,6 +10,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "github.com/cloudflare/pint/internal/promapi" @@ -179,8 +180,9 @@ func TestConfigHeaders(t *testing.T) { promapi.NewPrometheus("test", srv.URL, tc.config, time.Second, 1, 100, nil), }, true, "up", nil, nil, nil) - fg.StartWorkers() - defer fg.Close() + reg := prometheus.NewRegistry() + fg.StartWorkers(reg) + defer fg.Close(reg) _, err := fg.Config(context.Background()) require.NoError(t, err) diff --git a/internal/promapi/failover.go b/internal/promapi/failover.go index 0050e853..52bd48b3 100644 --- a/internal/promapi/failover.go +++ b/internal/promapi/failover.go @@ -2,6 +2,7 @@ package promapi import ( "context" + "log/slog" "regexp" "time" @@ -45,7 +46,6 @@ func cacheCleaner(cache *queryCache, interval time.Duration, quit chan bool) { type FailoverGroup struct { name string servers []*Prometheus - strictErrors bool uptimeMetric string cacheCollector *cacheCollector quitChan chan bool @@ -53,6 +53,8 @@ type FailoverGroup struct { pathsInclude []*regexp.Regexp pathsExclude []*regexp.Regexp tags []string + started bool + strictErrors bool } func NewFailoverGroup(name string, servers []*Prometheus, strictErrors bool, uptimeMetric string, include, exclude []*regexp.Regexp, tags []string) *FailoverGroup { @@ -79,6 +81,30 @@ func (fg *FailoverGroup) UptimeMetric() string { return fg.uptimeMetric } +func (fg *FailoverGroup) ServerCount() int { + return len(fg.servers) +} + +func (fg *FailoverGroup) MergeUpstreams(src *FailoverGroup) { + for _, ns := range src.servers { + var present bool + for _, ol := range fg.servers { + if ol.unsafeURI == ns.unsafeURI { + present = true + break + } + } + if !present { + fg.servers = append(fg.servers, ns) + slog.Debug( + "Added new failover URI", + slog.String("name", ns.name), + slog.String("uri", ns.safeURI), + ) + } + } +} + func (fg *FailoverGroup) IsEnabledForPath(path string) bool { if len(fg.pathsInclude) == 0 && len(fg.pathsExclude) == 0 { return true @@ -96,24 +122,32 @@ func (fg *FailoverGroup) IsEnabledForPath(path string) bool { return false } -func (fg *FailoverGroup) StartWorkers() { +func (fg *FailoverGroup) StartWorkers(reg *prometheus.Registry) { + if fg.started { + return + } + queryCache := newQueryCache(time.Hour) fg.quitChan = make(chan bool) go cacheCleaner(queryCache, time.Minute*2, fg.quitChan) fg.cacheCollector = newCacheCollector(queryCache, fg.name) - prometheus.MustRegister(fg.cacheCollector) + reg.MustRegister(fg.cacheCollector) for _, prom := range fg.servers { prom.cache = queryCache prom.StartWorkers() } + fg.started = true } -func (fg *FailoverGroup) Close() { +func (fg *FailoverGroup) Close(reg *prometheus.Registry) { + if !fg.started { + return + } for _, prom := range fg.servers { prom.Close() } - prometheus.Unregister(fg.cacheCollector) + reg.Unregister(fg.cacheCollector) fg.quitChan <- true } diff --git a/internal/promapi/flags_test.go b/internal/promapi/flags_test.go index 2e94d8d4..5a1e6723 100644 --- a/internal/promapi/flags_test.go +++ b/internal/promapi/flags_test.go @@ -9,6 +9,7 @@ import ( "time" v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "github.com/cloudflare/pint/internal/promapi" @@ -96,8 +97,9 @@ func TestFlags(t *testing.T) { promapi.NewPrometheus("test", srv.URL+tc.prefix, nil, tc.timeout, 1, 100, nil), }, true, "up", nil, nil, nil) - fg.StartWorkers() - defer fg.Close() + reg := prometheus.NewRegistry() + fg.StartWorkers(reg) + defer fg.Close(reg) flags, err := fg.Flags(context.Background()) if tc.err != "" { diff --git a/internal/promapi/metadata_test.go b/internal/promapi/metadata_test.go index 90d032bd..60679830 100644 --- a/internal/promapi/metadata_test.go +++ b/internal/promapi/metadata_test.go @@ -8,6 +8,7 @@ import ( "time" v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/require" "github.com/cloudflare/pint/internal/promapi" @@ -118,8 +119,9 @@ func TestMetadata(t *testing.T) { fg := promapi.NewFailoverGroup("test", []*promapi.Prometheus{ promapi.NewPrometheus("test", srv.URL, nil, tc.timeout, 1, 100, nil), }, true, "up", nil, nil, nil) - fg.StartWorkers() - defer fg.Close() + reg := prometheus.NewRegistry() + fg.StartWorkers(reg) + defer fg.Close(reg) metadata, err := fg.Metadata(context.Background(), tc.metric) if tc.err != "" { diff --git a/internal/promapi/metrics.go b/internal/promapi/metrics.go index 4c19c9db..d49028f4 100644 --- a/internal/promapi/metrics.go +++ b/internal/promapi/metrics.go @@ -32,10 +32,10 @@ var ( ) ) -func RegisterMetrics() { - prometheus.MustRegister(prometheusQueriesRunning) - prometheus.MustRegister(prometheusQueriesTotal) - prometheus.MustRegister(prometheusQueryErrorsTotal) +func RegisterMetrics(reg *prometheus.Registry) { + reg.MustRegister(prometheusQueriesRunning) + reg.MustRegister(prometheusQueriesTotal) + reg.MustRegister(prometheusQueryErrorsTotal) } func errReason(err error) string { diff --git a/internal/promapi/prometheus.go b/internal/promapi/prometheus.go index 4a61c661..8e692a50 100644 --- a/internal/promapi/prometheus.go +++ b/internal/promapi/prometheus.go @@ -115,9 +115,14 @@ func NewPrometheus(name, uri string, headers map[string]string, timeout time.Dur rateLimiter: ratelimit.New(rl), concurrency: concurrency, } + return &prom } +func (prom *Prometheus) SafeURI() string { + return prom.safeURI +} + func (prom *Prometheus) Close() { slog.Debug("Stopping query workers", slog.String("name", prom.name), slog.String("uri", prom.safeURI)) close(prom.queries) diff --git a/internal/promapi/query_test.go b/internal/promapi/query_test.go index 9462adf8..981f79ed 100644 --- a/internal/promapi/query_test.go +++ b/internal/promapi/query_test.go @@ -7,6 +7,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" @@ -248,8 +249,9 @@ func TestQuery(t *testing.T) { fg := promapi.NewFailoverGroup("test", []*promapi.Prometheus{ promapi.NewPrometheus("test", srv.URL, nil, tc.timeout, 1, 100, nil), }, true, "up", nil, nil, nil) - fg.StartWorkers() - defer fg.Close() + reg := prometheus.NewRegistry() + fg.StartWorkers(reg) + defer fg.Close(reg) qr, err := fg.Query(context.Background(), tc.query) if tc.err != "" { diff --git a/internal/promapi/range_test.go b/internal/promapi/range_test.go index 71fd870d..48d8ad00 100644 --- a/internal/promapi/range_test.go +++ b/internal/promapi/range_test.go @@ -10,6 +10,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" @@ -640,8 +641,9 @@ func TestRange(t *testing.T) { fg := promapi.NewFailoverGroup("test", []*promapi.Prometheus{ promapi.NewPrometheus("test", srv.URL, nil, tc.timeout, 1, 100, nil), }, true, "up", nil, nil, nil) - fg.StartWorkers() - defer fg.Close() + reg := prometheus.NewRegistry() + fg.StartWorkers(reg) + defer fg.Close(reg) for i := 1; i < 5; i++ { t.Run(tc.query, func(t *testing.T) {