From 63c8ddb5a10543f2428cdc1e57a0981a2d197ada Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Tue, 26 Sep 2023 15:57:37 +0200 Subject: [PATCH 1/5] Allow automatically mapping alert to teams --- class/defaults.yml | 9 ++ component/alert-routing-discovery.libsonnet | 148 ++++++++++++++++++ component/main.jsonnet | 6 +- .../ROOT/pages/references/parameters.adoc | 68 ++++++++ tests/capacity-alerts.yml | 68 ++++++++ .../10_alertmanager_config.yaml | 31 ++++ .../99_discovery_debug_cm.yaml | 119 ++++++++++++++ .../openshift4-monitoring/capacity_rules.yaml | 6 + .../prometheus_rules.yaml | 135 ++++++++++++++++ 9 files changed, 589 insertions(+), 1 deletion(-) create mode 100644 component/alert-routing-discovery.libsonnet create mode 100644 tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml diff --git a/class/defaults.yml b/class/defaults.yml index aea4484d..6c6e74cc 100644 --- a/class/defaults.yml +++ b/class/defaults.yml @@ -1,5 +1,7 @@ parameters: openshift4_monitoring: + team_fallback: null + =_metadata: library_aliases: prom.libsonnet: openshift4-monitoring-prom.libsonnet @@ -99,6 +101,13 @@ parameters: equal: - namespace - alertname + alertManagerAutoDiscovery: + enabled: true + debug_config_map: false + team_receiver_format: team_default_%s + additional_alert_matchers: [] + prepend_routes: [] + append_routes: [] alerts: includeNamespaces: - appuio.* diff --git a/component/alert-routing-discovery.libsonnet b/component/alert-routing-discovery.libsonnet new file mode 100644 index 00000000..f085154e --- /dev/null +++ b/component/alert-routing-discovery.libsonnet @@ -0,0 +1,148 @@ +local com = import 'lib/commodore.libjsonnet'; +local kap = import 'lib/kapitan.libjsonnet'; +local kube = import 'lib/kube.libjsonnet'; +local prom = import 'lib/prom.libsonnet'; + +local inv = kap.inventory(); +local params = inv.parameters; + +local adParams = params.openshift4_monitoring.alertManagerAutoDiscovery; + +local nullReceiver = '__component_openshift4_monitoring_null'; + +// appKeys returns the (aliased) application name and if aliased the original name in the second position. +// The application name is translated from kebab-case to snake_case, except if the second parameter is set to true. +local appKeys = function(name, raw=false) + local normalized = function(name) if raw then name else std.strReplace(name, '-', '_'); + // can be simplified with jsonnet > 0.19 which would support ' as ' as the substring + local parts = std.split(name, ' '); + if std.length(parts) == 1 then + [ normalized(parts[0]) ] + else if std.length(parts) == 3 && parts[1] == 'as' then + [ normalized(parts[2]), normalized(parts[0]) ] + else + error 'invalid application name `%s`' % name; + +// discoverNS returns the namespace for the given application. +// It looks into the follwing places: +// - params..namespace +// - params..namespace.name +// It does respect aliased applications and looks in the instance first and then in the base application. +local discoverNS = function(app) + local f = function(k) + if std.objectHas(params, k) then + local p = params[k]; + if std.objectHas(p, 'namespace') then + if std.isString(p.namespace) then + p.namespace + else if std.isObject(p.namespace) && std.objectHas(p.namespace, 'name') && std.isString(p.namespace.name) then + p.namespace.name; + + local ks = appKeys(app); + local aliased = f(ks[0]); + if aliased != null then + aliased + else if std.length(ks) == 2 then + f(ks[1]); + + +local ownerOrFallbackTeam = + if std.objectHas(params, 'syn') && std.objectHas(params.syn, 'owner') then + params.syn.owner + else + params.openshift4_monitoring.fallback_team; + +// teamsForApplication returns the teams for the given application. +// It does so by looking at the top level syn parameter. +// The syn parameter should look roughly like this. +// +// syn: +// owner: clumsy-donkeys +// teams: +// chubby-cockroaches: +// instances: +// - superb-visualization +// lovable-lizards: +// instances: +// - apartment-cats +// +// The application is first looked up in the instances of the teams, if no team is found, owner is used as fallback. +local teamsForApplication = function(app) + local lookup = function(app) + if std.objectHas(params, 'syn') && std.objectHas(params.syn, 'teams') then + local teams = params.syn.teams; + std.foldl( + function(prev, team) + if std.objectHas(teams, team) && std.objectHas(teams[team], 'instances') && std.member(teams[team].instances, app) then + prev + [ team ] + else + prev, + std.objectFields(teams), + [], + ); + + local teams = std.prune(std.map(lookup, appKeys(app, true))); + + if std.length(teams) > 0 then + teams[0] + else + [ ownerOrFallbackTeam ]; + +// teamToNS is a map from a team to namespaces. +local teamToNS = std.foldl( + function(prev, app) + local tms = teamsForApplication(app); + std.foldl( + function(prev, tm) prev { [tm]+: [ discoverNS(app) ] }, tms, prev + ) + , + inv.applications, + {} +); + +// teamBasedRouting contains discovered routes for teams. +// The routes are set up with `continue: true` so we can route to multiple teams. +// The last route catches all alerts already routed to a team. +local teamBasedRouting = std.map( + function(k) { + receiver: adParams.team_receiver_format % k, + matchers: adParams.additional_alert_matchers + [ + 'namespace =~ "%s"' % std.join('|', teamToNS[k]), + ], + continue: true, + }, + std.objectFields(teamToNS) +) + [ { + // catch all alerts already routed to a team + receiver: nullReceiver, + matchers: adParams.additional_alert_matchers + [ + 'namespace =~ "%s"' % std.join('|', std.foldl(function(prev, nss) prev + nss, std.objectValues(teamToNS), [])), + ], + continue: false, +} ]; + +local alertmanagerConfig = + params.openshift4_monitoring.alertManagerConfig { + receivers+: [ { name: nullReceiver } ], + route+: { + routes: adParams.prepend_routes + teamBasedRouting + adParams.append_routes + super.routes + if ownerOrFallbackTeam != null then [ { + receiver: adParams.team_receiver_format % ownerOrFallbackTeam, + } ] else [ { receiver: nullReceiver } ], + }, + }; + +{ + debugConfigMap: kube.ConfigMap('discovery-debug') { + data: { + local discoveredNamespaces = std.foldl(function(prev, app) prev { [app]: discoverNS(app) }, inv.applications, {}), + local discoveredTeams = std.foldl(function(prev, app) prev { [app]: teamsForApplication(app) }, inv.applications, {}), + applications: std.manifestJsonMinified(inv.applications), + discovered_namespaces: std.manifestYamlDoc(discoveredNamespaces), + apps_without_namespaces: std.manifestYamlDoc(std.foldl(function(prev, app) if discoveredNamespaces[app] == null then prev + [ app ] else prev, std.objectFields(discoveredNamespaces), [])), + discovered_teams: std.manifestYamlDoc(discoveredTeams), + proposed_routes: std.manifestYamlDoc(teamBasedRouting), + alertmanager: std.manifestYamlDoc(alertmanagerConfig), + }, + }, + alertmanagerConfig: alertmanagerConfig, +} diff --git a/component/main.jsonnet b/component/main.jsonnet index 506e8f00..572bfb39 100644 --- a/component/main.jsonnet +++ b/component/main.jsonnet @@ -11,6 +11,8 @@ local params = inv.parameters.openshift4_monitoring; local rules = import 'rules.jsonnet'; local capacity = import 'capacity.libsonnet'; +local alertDiscovery = import 'alert-routing-discovery.libsonnet'; + local ns = if params.namespace != 'openshift-monitoring' then error 'Component openshift4-monitoring does not support values for parameter `namespace` other than "openshift-monitoring".' @@ -105,9 +107,11 @@ local customRules = namespace: ns, }, stringData: { - 'alertmanager.yaml': std.manifestYamlDoc(params.alertManagerConfig), + 'alertmanager.yaml': if params.alertManagerAutoDiscovery.enabled then std.manifestYamlDoc(alertDiscovery.alertmanagerConfig) else alertDiscovery.alertmanagerConfig, }, }, + [if params.alertManagerAutoDiscovery.enabled && params.alertManagerAutoDiscovery.debug_config_map then '99_discovery_debug_cm']: alertDiscovery.debugConfigMap, + [if params.enableAlertmanagerIsolationNetworkPolicy then '20_networkpolicy']: std.map(function(p) com.namespaced('openshift-monitoring', p), import 'networkpolicy.libsonnet'), [if params.enableUserWorkload && params.enableUserWorkloadAlertmanagerIsolationNetworkPolicy then '20_user_workload_networkpolicy']: std.map(function(p) com.namespaced('openshift-user-workload-monitoring', p), import 'networkpolicy.libsonnet'), rbac: import 'rbac.libsonnet', diff --git a/docs/modules/ROOT/pages/references/parameters.adoc b/docs/modules/ROOT/pages/references/parameters.adoc index 0a399921..0497c334 100644 --- a/docs/modules/ROOT/pages/references/parameters.adoc +++ b/docs/modules/ROOT/pages/references/parameters.adoc @@ -268,6 +268,74 @@ A dictionary holding the configuration for the AlertManager. See the https://docs.openshift.com/container-platform/latest/monitoring/cluster_monitoring/configuring-the-monitoring-stack.html#configuring-alertmanager[OpenShift docs] for available parameters. + +== `alertManagerAutoDiscovery` + +[horizontal] +type:: dictionary +default:: ++ +[source,yaml] +---- +alertManagerAutoDiscovery: + enabled: true + debug_config_map: false + team_receiver_format: team_default_%s + additional_alert_matchers: [] + prepend_routes: [] + append_routes: [] +---- + +`alertManagerAutoDiscovery` holds the configuration for the Alertmanager auto-discovery feature. + +The auto-discovery routes alerts to the configured teams based on their namespaces and the top-level `syn.teams[*].instances` and `syn.owner` parameters. +It tries to find the namespaces associated with a Commodore application (called instance in the `syn` configuration) in the applications array, and matches the namespace to the team based on the `syn.teams[*].instances` parameter. + +.`syn` Team Example +[source,yaml] +---- +syn: + owner: daring-donkeys + teams: + electric-elephants: + instances: [postgres] +---- + +The auto-discovery feature is enabled by default. +A ConfigMap can be enabled with `debug_config_map` to debug the auto-discovery feature. + +The configuration is merged with the `alertManagerConfig` parameter. +Route receivers are generated for each team based on the `team_receiver_format` parameter. +The routes are ordered as follows: + +[source] +---- +alertManagerAutoDiscovery.prepend_routes + generated routes + alertManagerAutoDiscovery.append_routes + alertManagerConfig.routes + route all to syn.owner +---- + +`additional_alert_matchers` is a list of additional alert matchers to add to the generated routes. +This can be used to handle special cases where the auto-discovery feature does not work as expected. +For example if an alert should go to a different team than the namespace suggests based on a label. + +[source,yaml] +---- +alertManagerAutoDiscovery: + additional_alert_matchers: + - 'syn_team = ""' +# becomes +- continue: true + matchers: + - syn_team = "" + - namespace =~ "my-ns" + receiver: team_default_lovable-lizards +- continue: false + matchers: + - syn_team = "" + - namespace =~ "my-ns" + receiver: __component_openshift4_monitoring_null +---- + + == `alerts` [horizontal] diff --git a/tests/capacity-alerts.yml b/tests/capacity-alerts.yml index ec861084..a17564df 100644 --- a/tests/capacity-alerts.yml +++ b/tests/capacity-alerts.yml @@ -1,3 +1,15 @@ +applications: + - non-existing + - no-ns + - ns-string + - ns-object + - base as ns-in-base + - base as ns-overridden + - non-existing as still-non-existing + - shared-responsibility + - same-ns-1 + - same-ns-2 + parameters: kapitan: dependencies: @@ -13,3 +25,59 @@ parameters: openshift4_monitoring: capacityAlerts: enabled: true + alertManagerConfig: + route: + routes: + - receiver: other + matchers: + - other = "true" + alertManagerAutoDiscovery: + debug_config_map: true + team_receiver_format: team_default_%s + additional_alert_matchers: + - 'syn_team = ""' + prepend_routes: + - matchers: + - alertname = Watchdog + repeat_interval: 60s + receiver: heartbeat + + syn: + owner: clumsy-donkeys + teams: + chubby-cockroaches: + instances: + - ns-in-base + - ns-overridden + - shared-responsibility + lovable-lizards: + instances: + - ns-object + - shared-responsibility + - same-ns-2 + + no_ns: {} + + ns_string: + namespace: "ns-string" + + ns_object: + namespace: + name: "ns-object" + + base: + namespace: base + + ns_in_base: {} + + ns_overridden: + namespace: overridden + + shared_responsibility: + namespace: shared-responsibility + + same_ns_1: + namespace: same-ns + + same_ns_2: + namespace: same-ns diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 1d884f15..4c71f782 100644 --- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -24,8 +24,39 @@ stringData: "severity": "warning" "target_match_re": "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" "group_wait": "0s" "repeat_interval": "10m" + "routes": + - "matchers": + - "alertname = Watchdog" + "receiver": "heartbeat" + "repeat_interval": "60s" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"base|overridden|shared-responsibility\"" + "receiver": "team_default_chubby-cockroaches" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"ns-string|same-ns\"" + "receiver": "team_default_clumsy-donkeys" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"ns-object|shared-responsibility|same-ns\"" + "receiver": "team_default_lovable-lizards" + - "continue": false + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"base|overridden|shared-responsibility|ns-string|same-ns|ns-object|shared-responsibility|same-ns\"" + "receiver": "__component_openshift4_monitoring_null" + - "matchers": + - "other = \"true\"" + "receiver": "other" + - "receiver": "team_default_clumsy-donkeys" type: Opaque diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml new file mode 100644 index 00000000..538e610e --- /dev/null +++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml @@ -0,0 +1,119 @@ +apiVersion: v1 +data: + alertmanager: |- + "inhibit_rules": + - "equal": + - "namespace" + - "alertname" + "source_match": + "severity": "critical" + "target_match_re": + "severity": "warning|info" + - "equal": + - "namespace" + - "alertname" + "source_match": + "severity": "warning" + "target_match_re": + "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" + "route": + "group_interval": "5s" + "group_wait": "0s" + "repeat_interval": "10m" + "routes": + - "matchers": + - "alertname = Watchdog" + "receiver": "heartbeat" + "repeat_interval": "60s" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"base|overridden|shared-responsibility\"" + "receiver": "team_default_chubby-cockroaches" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"ns-string|same-ns\"" + "receiver": "team_default_clumsy-donkeys" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"ns-object|shared-responsibility|same-ns\"" + "receiver": "team_default_lovable-lizards" + - "continue": false + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"base|overridden|shared-responsibility|ns-string|same-ns|ns-object|shared-responsibility|same-ns\"" + "receiver": "__component_openshift4_monitoring_null" + - "matchers": + - "other = \"true\"" + "receiver": "other" + - "receiver": "team_default_clumsy-donkeys" + applications: '["non-existing","no-ns","ns-string","ns-object","base as ns-in-base","base + as ns-overridden","non-existing as still-non-existing","shared-responsibility","same-ns-1","same-ns-2"]' + apps_without_namespaces: |- + - "no-ns" + - "non-existing" + - "non-existing as still-non-existing" + discovered_namespaces: |- + "base as ns-in-base": "base" + "base as ns-overridden": "overridden" + "no-ns": null + "non-existing": null + "non-existing as still-non-existing": null + "ns-object": "ns-object" + "ns-string": "ns-string" + "same-ns-1": "same-ns" + "same-ns-2": "same-ns" + "shared-responsibility": "shared-responsibility" + discovered_teams: |- + "base as ns-in-base": + - "chubby-cockroaches" + "base as ns-overridden": + - "chubby-cockroaches" + "no-ns": + - "clumsy-donkeys" + "non-existing": + - "clumsy-donkeys" + "non-existing as still-non-existing": + - "clumsy-donkeys" + "ns-object": + - "lovable-lizards" + "ns-string": + - "clumsy-donkeys" + "same-ns-1": + - "clumsy-donkeys" + "same-ns-2": + - "lovable-lizards" + "shared-responsibility": + - "chubby-cockroaches" + - "lovable-lizards" + proposed_routes: |- + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"base|overridden|shared-responsibility\"" + "receiver": "team_default_chubby-cockroaches" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"ns-string|same-ns\"" + "receiver": "team_default_clumsy-donkeys" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"ns-object|shared-responsibility|same-ns\"" + "receiver": "team_default_lovable-lizards" + - "continue": false + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"base|overridden|shared-responsibility|ns-string|same-ns|ns-object|shared-responsibility|same-ns\"" + "receiver": "__component_openshift4_monitoring_null" +kind: ConfigMap +metadata: + annotations: {} + labels: + name: discovery-debug + name: discovery-debug diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml index a430c4b2..b4c8d9bf 100644 --- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml +++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml @@ -28,6 +28,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-MemoryCapacity rules: - alert: SYN_ClusterLowOnMemory @@ -47,6 +48,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-PodCapacity rules: - alert: SYN_TooManyPods @@ -66,6 +68,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-ResourceRequests rules: - alert: SYN_TooMuchCPURequested @@ -85,6 +88,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_TooMuchMemoryRequested annotations: description: The cluster is close to assigning all memory to running pods. @@ -102,6 +106,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-UnusedCapacity rules: - alert: SYN_ClusterHasUnusedNodes @@ -139,3 +144,4 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index d302f37b..7eaf543d 100644 --- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -33,6 +33,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerClusterFailedToSendAlerts annotations: description: The minimum notification failure rate to {{ $labels.integration @@ -54,6 +55,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerConfigInconsistent annotations: description: Alertmanager instances within the {{$labels.job}} cluster @@ -71,6 +73,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerFailedReload annotations: description: Configuration has failed to load for {{ $labels.namespace @@ -87,6 +90,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerFailedToSendAlerts annotations: description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed @@ -107,6 +111,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerMembersInconsistent annotations: description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has @@ -125,6 +130,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-cluster-machine-approver.rules rules: - alert: SYN_MachineApproverMaxPendingCSRsReached @@ -144,6 +150,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-cluster-network-operator-sdn.rules rules: - alert: SYN_NodeProxyApplySlow @@ -160,6 +167,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeProxyApplyStale annotations: description: Stale proxy rules for Kubernetes services may increase the @@ -176,6 +184,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeWithoutSDNController annotations: description: | @@ -191,6 +200,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeWithoutSDNPod annotations: description: Network control plane configuration on the node could be @@ -205,6 +215,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_SDNPodNotReady annotations: description: Network control plane configuration on the node could be @@ -219,6 +230,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-cluster-operators rules: - alert: SYN_ClusterOperatorDegraded @@ -246,6 +258,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_ClusterOperatorDown annotations: description: The {{ $labels.name }} operator may be down or disabled because @@ -264,6 +277,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_ClusterOperatorFlapping annotations: description: The {{ $labels.name }} operator behavior might cause upgrades @@ -280,6 +294,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-cluster-version rules: - alert: SYN_ClusterReleaseNotAccepted @@ -301,6 +316,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_ClusterVersionOperatorDown annotations: description: The operator may be down or disabled. The cluster will not @@ -321,6 +337,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeControllerManagerDown annotations: description: KubeControllerManager has disappeared from Prometheus target @@ -336,6 +353,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeSchedulerDown annotations: description: KubeScheduler has disappeared from Prometheus target discovery. @@ -349,6 +367,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PodDisruptionBudgetLimit annotations: description: The pod disruption budget is below the minimum disruptions @@ -364,6 +383,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-control-plane-cpu-utilization rules: - alert: SYN_ExtremelyHighIndividualControlPlaneCPU @@ -389,6 +409,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_HighOverallControlPlaneCPU annotations: description: Given three control plane nodes, the overall CPU utilization @@ -417,6 +438,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-etcd rules: - alert: SYN_etcdDatabaseQuotaLowSpace @@ -435,6 +457,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_etcdHighFsyncDurations annotations: description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync @@ -450,6 +473,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_etcdHighNumberOfFailedProposals annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal @@ -464,6 +488,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_etcdMembersDown annotations: description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value @@ -485,6 +510,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_etcdNoLeader annotations: description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance @@ -499,6 +525,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-extremely-high-individual-control-plane-memory rules: - alert: SYN_ExtremelyHighIndividualControlPlaneMemory @@ -536,6 +563,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-general.rules rules: - alert: Watchdog @@ -555,6 +583,7 @@ spec: severity: none syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-high-overall-control-plane-memory rules: - alert: SYN_HighOverallControlPlaneMemory @@ -594,6 +623,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-kube-state-metrics rules: - alert: SYN_KubeStateMetricsWatchErrors @@ -613,6 +643,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-kubernetes-apps rules: - alert: SYN_KubeContainerWaiting @@ -629,6 +660,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeDaemonSetMisScheduled annotations: description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ @@ -642,6 +674,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeDaemonSetNotScheduled annotations: description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ @@ -657,6 +690,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeDaemonSetRolloutStuck annotations: description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} @@ -692,6 +726,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeDeploymentGenerationMismatch annotations: description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment @@ -708,6 +743,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeDeploymentRolloutStuck annotations: description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment @@ -722,6 +758,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeJobFailed annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed @@ -737,6 +774,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeJobNotCompleted annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking @@ -751,6 +789,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubePodCrashLooping annotations: description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container @@ -764,6 +803,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubePodNotReady annotations: description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in @@ -785,6 +825,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeStatefulSetGenerationMismatch annotations: description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset @@ -801,6 +842,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeStatefulSetReplicasMismatch annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset @@ -823,6 +865,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeStatefulSetUpdateNotRolledOut annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset @@ -852,6 +895,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-kubernetes-storage rules: - alert: SYN_KubePersistentVolumeErrors @@ -867,6 +911,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim @@ -892,6 +937,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeFillingUp annotations: description: Based on recent sampling, the PersistentVolume claimed by @@ -920,6 +966,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim @@ -945,6 +992,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: Based on recent sampling, the PersistentVolume claimed by @@ -973,6 +1021,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-kubernetes-system rules: - alert: SYN_KubeClientErrors @@ -991,6 +1040,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-kubernetes-system-apiserver rules: - alert: SYN_KubeAPIDown @@ -1006,6 +1056,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeAPITerminatedRequests annotations: description: The kubernetes apiserver has terminated {{ $value | humanizePercentage @@ -1020,6 +1071,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeAggregatedAPIDown annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace @@ -1033,6 +1085,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeAggregatedAPIErrors annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace @@ -1046,6 +1099,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-kubernetes-system-kubelet rules: - alert: SYN_KubeNodeNotReady @@ -1061,6 +1115,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeNodeReadinessFlapping annotations: description: The readiness status of node {{ $labels.node }} has changed @@ -1074,6 +1129,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeNodeUnreachable annotations: description: '{{ $labels.node }} is unreachable and some workloads may @@ -1087,6 +1143,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeletClientCertificateRenewalErrors annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its @@ -1100,6 +1157,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeletDown annotations: description: Kubelet has disappeared from Prometheus target discovery. @@ -1114,6 +1172,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeletPlegDurationHigh annotations: description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile @@ -1127,6 +1186,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeletPodStartUpLatencyHigh annotations: description: Kubelet Pod startup 99th percentile latency is {{ $value @@ -1140,6 +1200,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeletServerCertificateRenewalErrors annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its @@ -1153,6 +1214,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-machine-api-operator-metrics-collector-up rules: - alert: SYN_MachineAPIOperatorMetricsCollectionFailing @@ -1168,6 +1230,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-machine-health-check-unterminated-short-circuit rules: - alert: SYN_MachineHealthCheckUnterminatedShortCircuit @@ -1185,6 +1248,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-machine-not-yet-deleted rules: - alert: SYN_MachineNotYetDeleted @@ -1203,6 +1267,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-machine-with-no-running-phase rules: - alert: SYN_MachineWithNoRunningPhase @@ -1220,6 +1285,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-machine-without-valid-node-ref rules: - alert: SYN_MachineWithoutValidNode @@ -1236,6 +1302,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-mcc-drain-error rules: - alert: SYN_MCCDrainError @@ -1252,6 +1319,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-mcd-kubelet-health-state-error rules: - alert: SYN_KubeletHealthState @@ -1265,6 +1333,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-mcd-pivot-error rules: - alert: SYN_MCDPivotError @@ -1281,6 +1350,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-mcd-reboot-error rules: - alert: SYN_MCDRebootError @@ -1297,6 +1367,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-node-exporter rules: - alert: SYN_NodeClockNotSynchronising @@ -1315,6 +1386,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeClockSkewDetected annotations: description: Clock at {{ $labels.instance }} is out of sync by more than @@ -1338,6 +1410,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeDiskIOSaturation annotations: description: | @@ -1352,6 +1425,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeFileDescriptorLimit annotations: description: File descriptors limit at {{ $labels.instance }} is currently @@ -1368,6 +1442,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemAlmostOutOfFiles annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1387,6 +1462,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemAlmostOutOfSpace annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1406,6 +1482,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemFilesFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1428,6 +1505,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1450,6 +1528,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1472,6 +1551,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeHighNumberConntrackEntriesUsed annotations: description: '{{ $value | humanizePercentage }} of conntrack entries are @@ -1484,6 +1564,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeMemoryHighUtilization annotations: description: | @@ -1497,6 +1578,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeMemoryMajorPagesFaults annotations: description: | @@ -1511,6 +1593,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeNetworkReceiveErrs annotations: description: '{{ $labels.instance }} interface {{ $labels.device }} has @@ -1525,6 +1608,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeNetworkTransmitErrs annotations: description: '{{ $labels.instance }} interface {{ $labels.device }} has @@ -1539,6 +1623,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_NodeSystemdServiceFailed annotations: description: Systemd service {{ $labels.name }} has entered failed state @@ -1552,6 +1637,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-node-network rules: - alert: SYN_NodeNetworkInterfaceFlapping @@ -1568,6 +1654,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-node-utilization rules: - alert: SYN_node_memory_free_percent @@ -1582,6 +1669,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-openshift-etcd.rules rules: - alert: SYN_etcdGRPCRequestsSlow @@ -1600,6 +1688,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_etcdHighNumberOfFailedGRPCRequests annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests @@ -1618,6 +1707,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_etcdHighNumberOfLeaderChanges annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }} average leader @@ -1633,6 +1723,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_etcdInsufficientMembers annotations: description: etcd is reporting fewer instances are available than are @@ -1654,6 +1745,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-openshift-general.rules rules: - alert: SYN_TargetDown @@ -1680,6 +1772,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-openshift-ingress-to-route-controller.rules rules: - alert: SYN_UnmanagedRoutes @@ -1696,6 +1789,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-openshift-ingress.rules rules: - alert: SYN_HAProxyDown @@ -1711,6 +1805,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_HAProxyReloadFail annotations: description: This alert fires when HAProxy fails to reload its configuration, @@ -1726,6 +1821,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_IngressControllerDegraded annotations: description: This alert fires when the IngressController status is degraded. @@ -1740,6 +1836,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_IngressControllerUnavailable annotations: description: This alert fires when the IngressController is not available. @@ -1754,6 +1851,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-openshift-kubernetes.rules rules: - alert: SYN_ClusterMonitoringOperatorReconciliationErrors @@ -1770,6 +1868,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubeDeploymentReplicasMismatch annotations: description: Deployment {{ $labels.namespace }}/{{ $labels.deployment @@ -1799,6 +1898,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_KubePodNotScheduled annotations: description: |- @@ -1814,6 +1914,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-prometheus rules: - alert: SYN_PrometheusBadConfig @@ -1831,6 +1932,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusDuplicateTimestamps annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping @@ -1845,6 +1947,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusErrorSendingAlertsToSomeAlertmanagers annotations: description: '{{ printf "%.1f" $value }}% errors while sending alerts @@ -1866,6 +1969,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusHighQueryLoad annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API @@ -1881,6 +1985,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusLabelLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped @@ -1896,6 +2001,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusMissingRuleEvaluations annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed @@ -1910,6 +2016,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusNotConnectedToAlertmanagers annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected @@ -1925,6 +2032,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusNotIngestingSamples annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting @@ -1946,6 +2054,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusNotificationQueueRunningFull annotations: description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} @@ -1966,6 +2075,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusOutOfOrderTimestamps annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping @@ -1980,6 +2090,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusRemoteStorageFailures annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to @@ -2004,6 +2115,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusRemoteWriteDesiredShards annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write @@ -2028,6 +2140,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusRuleFailures annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2042,6 +2155,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusSDRefreshFailure annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2055,6 +2169,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusScrapeBodySizeLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2069,6 +2184,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusScrapeSampleLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2084,6 +2200,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusTSDBCompactionsFailing annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected @@ -2097,6 +2214,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusTSDBReloadsFailing annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected @@ -2110,6 +2228,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusTargetLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped @@ -2125,6 +2244,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusTargetSyncFailure annotations: description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} @@ -2139,6 +2259,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-prometheus-operator rules: - alert: SYN_PrometheusOperatorNotReady @@ -2154,6 +2275,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorReconcileErrors annotations: description: '{{ $value | humanizePercentage }} of reconciling operations @@ -2168,6 +2290,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorRejectedResources annotations: description: Prometheus operator in {{ $labels.namespace }} namespace @@ -2182,6 +2305,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace @@ -2195,6 +2319,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorWatchErrors annotations: description: Errors while performing watch operations in controller {{$labels.controller}} @@ -2208,6 +2333,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-system-memory-exceeds-reservation rules: - alert: SYN_SystemMemoryExceedsReservation @@ -2229,6 +2355,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-thanos-query rules: - alert: SYN_ThanosQueryHttpRequestQueryErrorRateHigh @@ -2248,6 +2375,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_ThanosQueryHttpRequestQueryRangeErrorRateHigh annotations: description: Thanos Query {{$labels.job}} in {{$labels.namespace}} is @@ -2265,6 +2393,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_ThanosQueryOverload annotations: description: Thanos Query {{$labels.job}} in {{$labels.namespace}} has @@ -2285,6 +2414,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - name: syn-thanos-rule rules: - alert: SYN_ThanosNoRuleEvaluations @@ -2302,6 +2432,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_ThanosRuleGrpcErrorRate annotations: description: Thanos Rule {{$labels.job}} in {{$labels.namespace}} is failing @@ -2320,6 +2451,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_ThanosRuleHighRuleEvaluationFailures annotations: description: Thanos Rule {{$labels.instance}} in {{$labels.namespace}} @@ -2338,6 +2470,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_ThanosRuleQueueIsDroppingAlerts annotations: description: Thanos Rule {{$labels.instance}} in {{$labels.namespace}} @@ -2352,6 +2485,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_ThanosRuleSenderIsFailingAlerts annotations: description: Thanos Rule {{$labels.instance}} in {{$labels.namespace}} @@ -2365,3 +2499,4 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + syn_team: clumsy-donkeys From 3192bf3733f5ad2c9eb15ecd3d5766b8256d336f Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Mon, 2 Oct 2023 15:56:59 +0200 Subject: [PATCH 2/5] Update from template Template version: main (43e241e) --- .cruft.json | 4 ++-- .github/workflows/test.yaml | 2 ++ Makefile.vars.mk | 2 +- .../openshift4-monitoring/apps/openshift4-monitoring.yaml | 0 tests/team-routing.yml | 3 +++ 5 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 tests/golden/team-routing/openshift4-monitoring/apps/openshift4-monitoring.yaml create mode 100644 tests/team-routing.yml diff --git a/.cruft.json b/.cruft.json index 5583e1d6..eaa2f6e3 100644 --- a/.cruft.json +++ b/.cruft.json @@ -1,13 +1,13 @@ { "template": "https://github.com/projectsyn/commodore-component-template.git", - "commit": "d8afca0d957d69b362c2cb45e3f6faa13662dfe2", + "commit": "43e241e3b7daa4ad42a57889bb313e71098bf1dc", "checkout": "main", "context": { "cookiecutter": { "name": "OpenShift4 Monitoring", "slug": "openshift4-monitoring", "parameter_key": "openshift4_monitoring", - "test_cases": "capacity-alerts release-4.11 remote-write user-workload-monitoring capacity-alerts-with-node-labels vsphere team-label custom-rules release-4.12 release-4.13", + "test_cases": "capacity-alerts release-4.11 remote-write user-workload-monitoring capacity-alerts-with-node-labels vsphere team-label custom-rules release-4.12 release-4.13 team-routing", "add_lib": "y", "add_pp": "n", "add_golden": "y", diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index c0408bf4..203c0e40 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -42,6 +42,7 @@ jobs: - custom-rules - release-4.12 - release-4.13 + - team-routing defaults: run: working-directory: ${{ env.COMPONENT_NAME }} @@ -66,6 +67,7 @@ jobs: - custom-rules - release-4.12 - release-4.13 + - team-routing defaults: run: working-directory: ${{ env.COMPONENT_NAME }} diff --git a/Makefile.vars.mk b/Makefile.vars.mk index 0477d54a..fb4fea50 100644 --- a/Makefile.vars.mk +++ b/Makefile.vars.mk @@ -57,4 +57,4 @@ KUBENT_IMAGE ?= ghcr.io/doitintl/kube-no-trouble:latest KUBENT_DOCKER ?= $(DOCKER_CMD) $(DOCKER_ARGS) $(root_volume) --entrypoint=/app/kubent $(KUBENT_IMAGE) instance ?= capacity-alerts -test_instances = tests/capacity-alerts.yml tests/release-4.11.yml tests/remote-write.yml tests/user-workload-monitoring.yml tests/capacity-alerts-with-node-labels.yml tests/vsphere.yml tests/team-label.yml tests/custom-rules.yml tests/release-4.12.yml tests/release-4.13.yml +test_instances = tests/capacity-alerts.yml tests/release-4.11.yml tests/remote-write.yml tests/user-workload-monitoring.yml tests/capacity-alerts-with-node-labels.yml tests/vsphere.yml tests/team-label.yml tests/custom-rules.yml tests/release-4.12.yml tests/release-4.13.yml tests/team-routing.yml diff --git a/tests/golden/team-routing/openshift4-monitoring/apps/openshift4-monitoring.yaml b/tests/golden/team-routing/openshift4-monitoring/apps/openshift4-monitoring.yaml new file mode 100644 index 00000000..e69de29b diff --git a/tests/team-routing.yml b/tests/team-routing.yml new file mode 100644 index 00000000..a4da5b7b --- /dev/null +++ b/tests/team-routing.yml @@ -0,0 +1,3 @@ +# Overwrite parameters here + +# parameters: {...} From 2d16560dde494eeadebf4c4da147cb9e3f3e04c6 Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Mon, 2 Oct 2023 16:27:05 +0200 Subject: [PATCH 3/5] Move golden tests to correct place --- .cruft.json | 2 +- .github/workflows/test.yaml | 2 - Makefile.vars.mk | 2 +- class/defaults.yml | 2 +- component/alert-routing-discovery.libsonnet | 3 +- tests/capacity-alerts.yml | 68 ----- .../10_alertmanager_config.yaml | 8 + .../10_alertmanager_config.yaml | 27 +- .../openshift4-monitoring/capacity_rules.yaml | 6 - .../prometheus_rules.yaml | 135 --------- .../10_alertmanager_config.yaml | 8 + .../10_alertmanager_config.yaml | 8 + .../10_alertmanager_config.yaml | 8 + .../10_alertmanager_config.yaml | 8 + .../10_alertmanager_config.yaml | 8 + .../apps/openshift4-monitoring.yaml | 0 .../10_alertmanager_config.yaml | 31 -- .../00_namespace_labels.yaml | 0 .../openshift4-monitoring/01_secrets.yaml | 0 .../02_aggregated_clusterroles.yaml | 0 .../10_alertmanager_config.yaml | 62 ++++ .../openshift4-monitoring/10_configmap.yaml | 0 .../10_configmap_user_workload.yaml | 0 .../20_networkpolicy.yaml | 0 .../20_user_workload_networkpolicy.yaml | 0 .../99_discovery_debug_cm.yaml | 0 .../openshift4-monitoring/capacity_rules.yaml | 12 +- .../openshift4-monitoring/custom_rules.yaml | 2 +- .../prometheus_rules.yaml | 270 +++++++++--------- .../openshift4-monitoring/rbac.yaml | 0 .../openshift4-monitoring/silence.yaml | 0 .../10_alertmanager_config.yaml | 8 + .../10_alertmanager_config.yaml | 8 + tests/team-label.yml | 30 -- tests/team-routing.yml | 91 +++++- 35 files changed, 364 insertions(+), 445 deletions(-) delete mode 100644 tests/golden/team-label/openshift4-monitoring/apps/openshift4-monitoring.yaml delete mode 100644 tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/00_namespace_labels.yaml (100%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/01_secrets.yaml (100%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/02_aggregated_clusterroles.yaml (100%) create mode 100644 tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/10_configmap.yaml (100%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/10_configmap_user_workload.yaml (100%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/20_networkpolicy.yaml (100%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/20_user_workload_networkpolicy.yaml (100%) rename tests/golden/{capacity-alerts => team-routing}/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml (100%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml (97%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/custom_rules.yaml (93%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml (96%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/rbac.yaml (100%) rename tests/golden/{team-label => team-routing}/openshift4-monitoring/openshift4-monitoring/silence.yaml (100%) delete mode 100644 tests/team-label.yml diff --git a/.cruft.json b/.cruft.json index eaa2f6e3..fca8f025 100644 --- a/.cruft.json +++ b/.cruft.json @@ -7,7 +7,7 @@ "name": "OpenShift4 Monitoring", "slug": "openshift4-monitoring", "parameter_key": "openshift4_monitoring", - "test_cases": "capacity-alerts release-4.11 remote-write user-workload-monitoring capacity-alerts-with-node-labels vsphere team-label custom-rules release-4.12 release-4.13 team-routing", + "test_cases": "capacity-alerts release-4.11 remote-write user-workload-monitoring capacity-alerts-with-node-labels vsphere custom-rules release-4.12 release-4.13 team-routing", "add_lib": "y", "add_pp": "n", "add_golden": "y", diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 203c0e40..b3bc70f8 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -38,7 +38,6 @@ jobs: - user-workload-monitoring - capacity-alerts-with-node-labels - vsphere - - team-label - custom-rules - release-4.12 - release-4.13 @@ -63,7 +62,6 @@ jobs: - user-workload-monitoring - capacity-alerts-with-node-labels - vsphere - - team-label - custom-rules - release-4.12 - release-4.13 diff --git a/Makefile.vars.mk b/Makefile.vars.mk index fb4fea50..f3a102d1 100644 --- a/Makefile.vars.mk +++ b/Makefile.vars.mk @@ -57,4 +57,4 @@ KUBENT_IMAGE ?= ghcr.io/doitintl/kube-no-trouble:latest KUBENT_DOCKER ?= $(DOCKER_CMD) $(DOCKER_ARGS) $(root_volume) --entrypoint=/app/kubent $(KUBENT_IMAGE) instance ?= capacity-alerts -test_instances = tests/capacity-alerts.yml tests/release-4.11.yml tests/remote-write.yml tests/user-workload-monitoring.yml tests/capacity-alerts-with-node-labels.yml tests/vsphere.yml tests/team-label.yml tests/custom-rules.yml tests/release-4.12.yml tests/release-4.13.yml tests/team-routing.yml +test_instances = tests/capacity-alerts.yml tests/release-4.11.yml tests/remote-write.yml tests/user-workload-monitoring.yml tests/capacity-alerts-with-node-labels.yml tests/vsphere.yml tests/custom-rules.yml tests/release-4.12.yml tests/release-4.13.yml tests/team-routing.yml diff --git a/class/defaults.yml b/class/defaults.yml index 6c6e74cc..ca607f5b 100644 --- a/class/defaults.yml +++ b/class/defaults.yml @@ -1,6 +1,6 @@ parameters: openshift4_monitoring: - team_fallback: null + fallback_team: null =_metadata: library_aliases: diff --git a/component/alert-routing-discovery.libsonnet b/component/alert-routing-discovery.libsonnet index f085154e..b7752af9 100644 --- a/component/alert-routing-discovery.libsonnet +++ b/component/alert-routing-discovery.libsonnet @@ -122,10 +122,11 @@ local teamBasedRouting = std.map( } ]; local alertmanagerConfig = + local routes = std.get(params.openshift4_monitoring.alertManagerConfig.route, 'routes', []); params.openshift4_monitoring.alertManagerConfig { receivers+: [ { name: nullReceiver } ], route+: { - routes: adParams.prepend_routes + teamBasedRouting + adParams.append_routes + super.routes + if ownerOrFallbackTeam != null then [ { + routes: adParams.prepend_routes + teamBasedRouting + adParams.append_routes + routes + if ownerOrFallbackTeam != null then [ { receiver: adParams.team_receiver_format % ownerOrFallbackTeam, } ] else [ { receiver: nullReceiver } ], }, diff --git a/tests/capacity-alerts.yml b/tests/capacity-alerts.yml index a17564df..ec861084 100644 --- a/tests/capacity-alerts.yml +++ b/tests/capacity-alerts.yml @@ -1,15 +1,3 @@ -applications: - - non-existing - - no-ns - - ns-string - - ns-object - - base as ns-in-base - - base as ns-overridden - - non-existing as still-non-existing - - shared-responsibility - - same-ns-1 - - same-ns-2 - parameters: kapitan: dependencies: @@ -25,59 +13,3 @@ parameters: openshift4_monitoring: capacityAlerts: enabled: true - alertManagerConfig: - route: - routes: - - receiver: other - matchers: - - other = "true" - alertManagerAutoDiscovery: - debug_config_map: true - team_receiver_format: team_default_%s - additional_alert_matchers: - - 'syn_team = ""' - prepend_routes: - - matchers: - - alertname = Watchdog - repeat_interval: 60s - receiver: heartbeat - - syn: - owner: clumsy-donkeys - teams: - chubby-cockroaches: - instances: - - ns-in-base - - ns-overridden - - shared-responsibility - lovable-lizards: - instances: - - ns-object - - shared-responsibility - - same-ns-2 - - no_ns: {} - - ns_string: - namespace: "ns-string" - - ns_object: - namespace: - name: "ns-object" - - base: - namespace: base - - ns_in_base: {} - - ns_overridden: - namespace: overridden - - shared_responsibility: - namespace: shared-responsibility - - same_ns_1: - namespace: same-ns - - same_ns_2: - namespace: same-ns diff --git a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 1d884f15..5035dc3d 100644 --- a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -24,8 +24,16 @@ stringData: "severity": "warning" "target_match_re": "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" "group_wait": "0s" "repeat_interval": "10m" + "routes": + - "continue": false + "matchers": + - "namespace =~ \"\"" + "receiver": "__component_openshift4_monitoring_null" + - "receiver": "__component_openshift4_monitoring_null" type: Opaque diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 4c71f782..5035dc3d 100644 --- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -31,32 +31,9 @@ stringData: "group_wait": "0s" "repeat_interval": "10m" "routes": - - "matchers": - - "alertname = Watchdog" - "receiver": "heartbeat" - "repeat_interval": "60s" - - "continue": true - "matchers": - - "syn_team = \"\"" - - "namespace =~ \"base|overridden|shared-responsibility\"" - "receiver": "team_default_chubby-cockroaches" - - "continue": true - "matchers": - - "syn_team = \"\"" - - "namespace =~ \"ns-string|same-ns\"" - "receiver": "team_default_clumsy-donkeys" - - "continue": true - "matchers": - - "syn_team = \"\"" - - "namespace =~ \"ns-object|shared-responsibility|same-ns\"" - "receiver": "team_default_lovable-lizards" - "continue": false "matchers": - - "syn_team = \"\"" - - "namespace =~ \"base|overridden|shared-responsibility|ns-string|same-ns|ns-object|shared-responsibility|same-ns\"" + - "namespace =~ \"\"" "receiver": "__component_openshift4_monitoring_null" - - "matchers": - - "other = \"true\"" - "receiver": "other" - - "receiver": "team_default_clumsy-donkeys" + - "receiver": "__component_openshift4_monitoring_null" type: Opaque diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml index b4c8d9bf..a430c4b2 100644 --- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml +++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml @@ -28,7 +28,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-MemoryCapacity rules: - alert: SYN_ClusterLowOnMemory @@ -48,7 +47,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-PodCapacity rules: - alert: SYN_TooManyPods @@ -68,7 +66,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-ResourceRequests rules: - alert: SYN_TooMuchCPURequested @@ -88,7 +85,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_TooMuchMemoryRequested annotations: description: The cluster is close to assigning all memory to running pods. @@ -106,7 +102,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-UnusedCapacity rules: - alert: SYN_ClusterHasUnusedNodes @@ -144,4 +139,3 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 7eaf543d..d302f37b 100644 --- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -33,7 +33,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_AlertmanagerClusterFailedToSendAlerts annotations: description: The minimum notification failure rate to {{ $labels.integration @@ -55,7 +54,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_AlertmanagerConfigInconsistent annotations: description: Alertmanager instances within the {{$labels.job}} cluster @@ -73,7 +71,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_AlertmanagerFailedReload annotations: description: Configuration has failed to load for {{ $labels.namespace @@ -90,7 +87,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_AlertmanagerFailedToSendAlerts annotations: description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed @@ -111,7 +107,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_AlertmanagerMembersInconsistent annotations: description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has @@ -130,7 +125,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-cluster-machine-approver.rules rules: - alert: SYN_MachineApproverMaxPendingCSRsReached @@ -150,7 +144,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-cluster-network-operator-sdn.rules rules: - alert: SYN_NodeProxyApplySlow @@ -167,7 +160,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeProxyApplyStale annotations: description: Stale proxy rules for Kubernetes services may increase the @@ -184,7 +176,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeWithoutSDNController annotations: description: | @@ -200,7 +191,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeWithoutSDNPod annotations: description: Network control plane configuration on the node could be @@ -215,7 +205,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_SDNPodNotReady annotations: description: Network control plane configuration on the node could be @@ -230,7 +219,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-cluster-operators rules: - alert: SYN_ClusterOperatorDegraded @@ -258,7 +246,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_ClusterOperatorDown annotations: description: The {{ $labels.name }} operator may be down or disabled because @@ -277,7 +264,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_ClusterOperatorFlapping annotations: description: The {{ $labels.name }} operator behavior might cause upgrades @@ -294,7 +280,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-cluster-version rules: - alert: SYN_ClusterReleaseNotAccepted @@ -316,7 +301,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_ClusterVersionOperatorDown annotations: description: The operator may be down or disabled. The cluster will not @@ -337,7 +321,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeControllerManagerDown annotations: description: KubeControllerManager has disappeared from Prometheus target @@ -353,7 +336,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeSchedulerDown annotations: description: KubeScheduler has disappeared from Prometheus target discovery. @@ -367,7 +349,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PodDisruptionBudgetLimit annotations: description: The pod disruption budget is below the minimum disruptions @@ -383,7 +364,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-control-plane-cpu-utilization rules: - alert: SYN_ExtremelyHighIndividualControlPlaneCPU @@ -409,7 +389,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_HighOverallControlPlaneCPU annotations: description: Given three control plane nodes, the overall CPU utilization @@ -438,7 +417,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-etcd rules: - alert: SYN_etcdDatabaseQuotaLowSpace @@ -457,7 +435,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_etcdHighFsyncDurations annotations: description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync @@ -473,7 +450,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_etcdHighNumberOfFailedProposals annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal @@ -488,7 +464,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_etcdMembersDown annotations: description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value @@ -510,7 +485,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_etcdNoLeader annotations: description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance @@ -525,7 +499,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-extremely-high-individual-control-plane-memory rules: - alert: SYN_ExtremelyHighIndividualControlPlaneMemory @@ -563,7 +536,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-general.rules rules: - alert: Watchdog @@ -583,7 +555,6 @@ spec: severity: none syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-high-overall-control-plane-memory rules: - alert: SYN_HighOverallControlPlaneMemory @@ -623,7 +594,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-kube-state-metrics rules: - alert: SYN_KubeStateMetricsWatchErrors @@ -643,7 +613,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-kubernetes-apps rules: - alert: SYN_KubeContainerWaiting @@ -660,7 +629,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeDaemonSetMisScheduled annotations: description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ @@ -674,7 +642,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeDaemonSetNotScheduled annotations: description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ @@ -690,7 +657,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeDaemonSetRolloutStuck annotations: description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} @@ -726,7 +692,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeDeploymentGenerationMismatch annotations: description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment @@ -743,7 +708,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeDeploymentRolloutStuck annotations: description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment @@ -758,7 +722,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeJobFailed annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed @@ -774,7 +737,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeJobNotCompleted annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking @@ -789,7 +751,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubePodCrashLooping annotations: description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container @@ -803,7 +764,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubePodNotReady annotations: description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in @@ -825,7 +785,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeStatefulSetGenerationMismatch annotations: description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset @@ -842,7 +801,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeStatefulSetReplicasMismatch annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset @@ -865,7 +823,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeStatefulSetUpdateNotRolledOut annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset @@ -895,7 +852,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-kubernetes-storage rules: - alert: SYN_KubePersistentVolumeErrors @@ -911,7 +867,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim @@ -937,7 +892,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeFillingUp annotations: description: Based on recent sampling, the PersistentVolume claimed by @@ -966,7 +920,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim @@ -992,7 +945,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: Based on recent sampling, the PersistentVolume claimed by @@ -1021,7 +973,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-kubernetes-system rules: - alert: SYN_KubeClientErrors @@ -1040,7 +991,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-kubernetes-system-apiserver rules: - alert: SYN_KubeAPIDown @@ -1056,7 +1006,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeAPITerminatedRequests annotations: description: The kubernetes apiserver has terminated {{ $value | humanizePercentage @@ -1071,7 +1020,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeAggregatedAPIDown annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace @@ -1085,7 +1033,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeAggregatedAPIErrors annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace @@ -1099,7 +1046,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-kubernetes-system-kubelet rules: - alert: SYN_KubeNodeNotReady @@ -1115,7 +1061,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeNodeReadinessFlapping annotations: description: The readiness status of node {{ $labels.node }} has changed @@ -1129,7 +1074,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeNodeUnreachable annotations: description: '{{ $labels.node }} is unreachable and some workloads may @@ -1143,7 +1087,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeletClientCertificateRenewalErrors annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its @@ -1157,7 +1100,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeletDown annotations: description: Kubelet has disappeared from Prometheus target discovery. @@ -1172,7 +1114,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeletPlegDurationHigh annotations: description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile @@ -1186,7 +1127,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeletPodStartUpLatencyHigh annotations: description: Kubelet Pod startup 99th percentile latency is {{ $value @@ -1200,7 +1140,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeletServerCertificateRenewalErrors annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its @@ -1214,7 +1153,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-machine-api-operator-metrics-collector-up rules: - alert: SYN_MachineAPIOperatorMetricsCollectionFailing @@ -1230,7 +1168,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-machine-health-check-unterminated-short-circuit rules: - alert: SYN_MachineHealthCheckUnterminatedShortCircuit @@ -1248,7 +1185,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-machine-not-yet-deleted rules: - alert: SYN_MachineNotYetDeleted @@ -1267,7 +1203,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-machine-with-no-running-phase rules: - alert: SYN_MachineWithNoRunningPhase @@ -1285,7 +1220,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-machine-without-valid-node-ref rules: - alert: SYN_MachineWithoutValidNode @@ -1302,7 +1236,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-mcc-drain-error rules: - alert: SYN_MCCDrainError @@ -1319,7 +1252,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-mcd-kubelet-health-state-error rules: - alert: SYN_KubeletHealthState @@ -1333,7 +1265,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-mcd-pivot-error rules: - alert: SYN_MCDPivotError @@ -1350,7 +1281,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-mcd-reboot-error rules: - alert: SYN_MCDRebootError @@ -1367,7 +1297,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-node-exporter rules: - alert: SYN_NodeClockNotSynchronising @@ -1386,7 +1315,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeClockSkewDetected annotations: description: Clock at {{ $labels.instance }} is out of sync by more than @@ -1410,7 +1338,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeDiskIOSaturation annotations: description: | @@ -1425,7 +1352,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeFileDescriptorLimit annotations: description: File descriptors limit at {{ $labels.instance }} is currently @@ -1442,7 +1368,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemAlmostOutOfFiles annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1462,7 +1387,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemAlmostOutOfSpace annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1482,7 +1406,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemFilesFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1505,7 +1428,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1528,7 +1450,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1551,7 +1472,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeHighNumberConntrackEntriesUsed annotations: description: '{{ $value | humanizePercentage }} of conntrack entries are @@ -1564,7 +1484,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeMemoryHighUtilization annotations: description: | @@ -1578,7 +1497,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeMemoryMajorPagesFaults annotations: description: | @@ -1593,7 +1511,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeNetworkReceiveErrs annotations: description: '{{ $labels.instance }} interface {{ $labels.device }} has @@ -1608,7 +1525,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeNetworkTransmitErrs annotations: description: '{{ $labels.instance }} interface {{ $labels.device }} has @@ -1623,7 +1539,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_NodeSystemdServiceFailed annotations: description: Systemd service {{ $labels.name }} has entered failed state @@ -1637,7 +1552,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-node-network rules: - alert: SYN_NodeNetworkInterfaceFlapping @@ -1654,7 +1568,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-node-utilization rules: - alert: SYN_node_memory_free_percent @@ -1669,7 +1582,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-openshift-etcd.rules rules: - alert: SYN_etcdGRPCRequestsSlow @@ -1688,7 +1600,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_etcdHighNumberOfFailedGRPCRequests annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests @@ -1707,7 +1618,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_etcdHighNumberOfLeaderChanges annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }} average leader @@ -1723,7 +1633,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_etcdInsufficientMembers annotations: description: etcd is reporting fewer instances are available than are @@ -1745,7 +1654,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-openshift-general.rules rules: - alert: SYN_TargetDown @@ -1772,7 +1680,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-openshift-ingress-to-route-controller.rules rules: - alert: SYN_UnmanagedRoutes @@ -1789,7 +1696,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-openshift-ingress.rules rules: - alert: SYN_HAProxyDown @@ -1805,7 +1711,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_HAProxyReloadFail annotations: description: This alert fires when HAProxy fails to reload its configuration, @@ -1821,7 +1726,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_IngressControllerDegraded annotations: description: This alert fires when the IngressController status is degraded. @@ -1836,7 +1740,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_IngressControllerUnavailable annotations: description: This alert fires when the IngressController is not available. @@ -1851,7 +1754,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-openshift-kubernetes.rules rules: - alert: SYN_ClusterMonitoringOperatorReconciliationErrors @@ -1868,7 +1770,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubeDeploymentReplicasMismatch annotations: description: Deployment {{ $labels.namespace }}/{{ $labels.deployment @@ -1898,7 +1799,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_KubePodNotScheduled annotations: description: |- @@ -1914,7 +1814,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-prometheus rules: - alert: SYN_PrometheusBadConfig @@ -1932,7 +1831,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusDuplicateTimestamps annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping @@ -1947,7 +1845,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusErrorSendingAlertsToSomeAlertmanagers annotations: description: '{{ printf "%.1f" $value }}% errors while sending alerts @@ -1969,7 +1866,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusHighQueryLoad annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API @@ -1985,7 +1881,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusLabelLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped @@ -2001,7 +1896,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusMissingRuleEvaluations annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed @@ -2016,7 +1910,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusNotConnectedToAlertmanagers annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected @@ -2032,7 +1925,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusNotIngestingSamples annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting @@ -2054,7 +1946,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusNotificationQueueRunningFull annotations: description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} @@ -2075,7 +1966,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusOutOfOrderTimestamps annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping @@ -2090,7 +1980,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusRemoteStorageFailures annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to @@ -2115,7 +2004,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusRemoteWriteDesiredShards annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write @@ -2140,7 +2028,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusRuleFailures annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2155,7 +2042,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusSDRefreshFailure annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2169,7 +2055,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusScrapeBodySizeLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2184,7 +2069,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusScrapeSampleLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2200,7 +2084,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusTSDBCompactionsFailing annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected @@ -2214,7 +2097,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusTSDBReloadsFailing annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected @@ -2228,7 +2110,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusTargetLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped @@ -2244,7 +2125,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusTargetSyncFailure annotations: description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} @@ -2259,7 +2139,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-prometheus-operator rules: - alert: SYN_PrometheusOperatorNotReady @@ -2275,7 +2154,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorReconcileErrors annotations: description: '{{ $value | humanizePercentage }} of reconciling operations @@ -2290,7 +2168,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorRejectedResources annotations: description: Prometheus operator in {{ $labels.namespace }} namespace @@ -2305,7 +2182,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace @@ -2319,7 +2195,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorWatchErrors annotations: description: Errors while performing watch operations in controller {{$labels.controller}} @@ -2333,7 +2208,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-system-memory-exceeds-reservation rules: - alert: SYN_SystemMemoryExceedsReservation @@ -2355,7 +2229,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-thanos-query rules: - alert: SYN_ThanosQueryHttpRequestQueryErrorRateHigh @@ -2375,7 +2248,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_ThanosQueryHttpRequestQueryRangeErrorRateHigh annotations: description: Thanos Query {{$labels.job}} in {{$labels.namespace}} is @@ -2393,7 +2265,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_ThanosQueryOverload annotations: description: Thanos Query {{$labels.job}} in {{$labels.namespace}} has @@ -2414,7 +2285,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - name: syn-thanos-rule rules: - alert: SYN_ThanosNoRuleEvaluations @@ -2432,7 +2302,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_ThanosRuleGrpcErrorRate annotations: description: Thanos Rule {{$labels.job}} in {{$labels.namespace}} is failing @@ -2451,7 +2320,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_ThanosRuleHighRuleEvaluationFailures annotations: description: Thanos Rule {{$labels.instance}} in {{$labels.namespace}} @@ -2470,7 +2338,6 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_ThanosRuleQueueIsDroppingAlerts annotations: description: Thanos Rule {{$labels.instance}} in {{$labels.namespace}} @@ -2485,7 +2352,6 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys - alert: SYN_ThanosRuleSenderIsFailingAlerts annotations: description: Thanos Rule {{$labels.instance}} in {{$labels.namespace}} @@ -2499,4 +2365,3 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: clumsy-donkeys diff --git a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 1d884f15..5035dc3d 100644 --- a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -24,8 +24,16 @@ stringData: "severity": "warning" "target_match_re": "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" "group_wait": "0s" "repeat_interval": "10m" + "routes": + - "continue": false + "matchers": + - "namespace =~ \"\"" + "receiver": "__component_openshift4_monitoring_null" + - "receiver": "__component_openshift4_monitoring_null" type: Opaque diff --git a/tests/golden/release-4.11/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/release-4.11/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 1d884f15..5035dc3d 100644 --- a/tests/golden/release-4.11/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/release-4.11/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -24,8 +24,16 @@ stringData: "severity": "warning" "target_match_re": "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" "group_wait": "0s" "repeat_interval": "10m" + "routes": + - "continue": false + "matchers": + - "namespace =~ \"\"" + "receiver": "__component_openshift4_monitoring_null" + - "receiver": "__component_openshift4_monitoring_null" type: Opaque diff --git a/tests/golden/release-4.12/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/release-4.12/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 1d884f15..5035dc3d 100644 --- a/tests/golden/release-4.12/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/release-4.12/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -24,8 +24,16 @@ stringData: "severity": "warning" "target_match_re": "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" "group_wait": "0s" "repeat_interval": "10m" + "routes": + - "continue": false + "matchers": + - "namespace =~ \"\"" + "receiver": "__component_openshift4_monitoring_null" + - "receiver": "__component_openshift4_monitoring_null" type: Opaque diff --git a/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 1d884f15..5035dc3d 100644 --- a/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -24,8 +24,16 @@ stringData: "severity": "warning" "target_match_re": "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" "group_wait": "0s" "repeat_interval": "10m" + "routes": + - "continue": false + "matchers": + - "namespace =~ \"\"" + "receiver": "__component_openshift4_monitoring_null" + - "receiver": "__component_openshift4_monitoring_null" type: Opaque diff --git a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 1d884f15..5035dc3d 100644 --- a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -24,8 +24,16 @@ stringData: "severity": "warning" "target_match_re": "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" "group_wait": "0s" "repeat_interval": "10m" + "routes": + - "continue": false + "matchers": + - "namespace =~ \"\"" + "receiver": "__component_openshift4_monitoring_null" + - "receiver": "__component_openshift4_monitoring_null" type: Opaque diff --git a/tests/golden/team-label/openshift4-monitoring/apps/openshift4-monitoring.yaml b/tests/golden/team-label/openshift4-monitoring/apps/openshift4-monitoring.yaml deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml deleted file mode 100644 index 1d884f15..00000000 --- a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ /dev/null @@ -1,31 +0,0 @@ -apiVersion: v1 -data: {} -kind: Secret -metadata: - annotations: {} - labels: - name: alertmanager-main - name: alertmanager-main - namespace: openshift-monitoring -stringData: - alertmanager.yaml: |- - "inhibit_rules": - - "equal": - - "namespace" - - "alertname" - "source_match": - "severity": "critical" - "target_match_re": - "severity": "warning|info" - - "equal": - - "namespace" - - "alertname" - "source_match": - "severity": "warning" - "target_match_re": - "severity": "info" - "route": - "group_interval": "5s" - "group_wait": "0s" - "repeat_interval": "10m" -type: Opaque diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/00_namespace_labels.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/00_namespace_labels.yaml similarity index 100% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/00_namespace_labels.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/00_namespace_labels.yaml diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/01_secrets.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/01_secrets.yaml similarity index 100% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/01_secrets.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/01_secrets.yaml diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/02_aggregated_clusterroles.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/02_aggregated_clusterroles.yaml similarity index 100% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/02_aggregated_clusterroles.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/02_aggregated_clusterroles.yaml diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml new file mode 100644 index 00000000..4c71f782 --- /dev/null +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -0,0 +1,62 @@ +apiVersion: v1 +data: {} +kind: Secret +metadata: + annotations: {} + labels: + name: alertmanager-main + name: alertmanager-main + namespace: openshift-monitoring +stringData: + alertmanager.yaml: |- + "inhibit_rules": + - "equal": + - "namespace" + - "alertname" + "source_match": + "severity": "critical" + "target_match_re": + "severity": "warning|info" + - "equal": + - "namespace" + - "alertname" + "source_match": + "severity": "warning" + "target_match_re": + "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" + "route": + "group_interval": "5s" + "group_wait": "0s" + "repeat_interval": "10m" + "routes": + - "matchers": + - "alertname = Watchdog" + "receiver": "heartbeat" + "repeat_interval": "60s" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"base|overridden|shared-responsibility\"" + "receiver": "team_default_chubby-cockroaches" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"ns-string|same-ns\"" + "receiver": "team_default_clumsy-donkeys" + - "continue": true + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"ns-object|shared-responsibility|same-ns\"" + "receiver": "team_default_lovable-lizards" + - "continue": false + "matchers": + - "syn_team = \"\"" + - "namespace =~ \"base|overridden|shared-responsibility|ns-string|same-ns|ns-object|shared-responsibility|same-ns\"" + "receiver": "__component_openshift4_monitoring_null" + - "matchers": + - "other = \"true\"" + "receiver": "other" + - "receiver": "team_default_clumsy-donkeys" +type: Opaque diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/10_configmap.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_configmap.yaml similarity index 100% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/10_configmap.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_configmap.yaml diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/10_configmap_user_workload.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_configmap_user_workload.yaml similarity index 100% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/10_configmap_user_workload.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_configmap_user_workload.yaml diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/20_networkpolicy.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/20_networkpolicy.yaml similarity index 100% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/20_networkpolicy.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/20_networkpolicy.yaml diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/20_user_workload_networkpolicy.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/20_user_workload_networkpolicy.yaml similarity index 100% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/20_user_workload_networkpolicy.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/20_user_workload_networkpolicy.yaml diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml similarity index 100% rename from tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml similarity index 97% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml index 7c74a8ce..b4c8d9bf 100644 --- a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/capacity_rules.yaml @@ -28,7 +28,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: my_team + syn_team: clumsy-donkeys - name: syn-MemoryCapacity rules: - alert: SYN_ClusterLowOnMemory @@ -48,7 +48,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: my_team + syn_team: clumsy-donkeys - name: syn-PodCapacity rules: - alert: SYN_TooManyPods @@ -68,7 +68,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: my_team + syn_team: clumsy-donkeys - name: syn-ResourceRequests rules: - alert: SYN_TooMuchCPURequested @@ -88,7 +88,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: my_team + syn_team: clumsy-donkeys - alert: SYN_TooMuchMemoryRequested annotations: description: The cluster is close to assigning all memory to running pods. @@ -106,7 +106,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: my_team + syn_team: clumsy-donkeys - name: syn-UnusedCapacity rules: - alert: SYN_ClusterHasUnusedNodes @@ -144,4 +144,4 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: my_team + syn_team: clumsy-donkeys diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/custom_rules.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/custom_rules.yaml similarity index 93% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/custom_rules.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/custom_rules.yaml index 942e5f10..0db382a0 100644 --- a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/custom_rules.yaml +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/custom_rules.yaml @@ -22,4 +22,4 @@ spec: labels: syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml similarity index 96% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 54d3e747..7eaf543d 100644 --- a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -33,7 +33,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerClusterFailedToSendAlerts annotations: description: The minimum notification failure rate to {{ $labels.integration @@ -55,7 +55,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerConfigInconsistent annotations: description: Alertmanager instances within the {{$labels.job}} cluster @@ -73,7 +73,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerFailedReload annotations: description: Configuration has failed to load for {{ $labels.namespace @@ -90,7 +90,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerFailedToSendAlerts annotations: description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed @@ -111,7 +111,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_AlertmanagerMembersInconsistent annotations: description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} has @@ -130,7 +130,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-cluster-machine-approver.rules rules: - alert: SYN_MachineApproverMaxPendingCSRsReached @@ -150,7 +150,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-cluster-network-operator-sdn.rules rules: - alert: SYN_NodeProxyApplySlow @@ -167,7 +167,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeProxyApplyStale annotations: description: Stale proxy rules for Kubernetes services may increase the @@ -184,7 +184,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeWithoutSDNController annotations: description: | @@ -200,7 +200,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeWithoutSDNPod annotations: description: Network control plane configuration on the node could be @@ -215,7 +215,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_SDNPodNotReady annotations: description: Network control plane configuration on the node could be @@ -230,7 +230,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-cluster-operators rules: - alert: SYN_ClusterOperatorDegraded @@ -258,7 +258,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_ClusterOperatorDown annotations: description: The {{ $labels.name }} operator may be down or disabled because @@ -277,7 +277,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_ClusterOperatorFlapping annotations: description: The {{ $labels.name }} operator behavior might cause upgrades @@ -294,7 +294,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-cluster-version rules: - alert: SYN_ClusterReleaseNotAccepted @@ -316,7 +316,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_ClusterVersionOperatorDown annotations: description: The operator may be down or disabled. The cluster will not @@ -337,7 +337,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeControllerManagerDown annotations: description: KubeControllerManager has disappeared from Prometheus target @@ -353,7 +353,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeSchedulerDown annotations: description: KubeScheduler has disappeared from Prometheus target discovery. @@ -367,7 +367,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PodDisruptionBudgetLimit annotations: description: The pod disruption budget is below the minimum disruptions @@ -383,7 +383,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-control-plane-cpu-utilization rules: - alert: SYN_ExtremelyHighIndividualControlPlaneCPU @@ -409,7 +409,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_HighOverallControlPlaneCPU annotations: description: Given three control plane nodes, the overall CPU utilization @@ -438,7 +438,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-etcd rules: - alert: SYN_etcdDatabaseQuotaLowSpace @@ -457,7 +457,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_etcdHighFsyncDurations annotations: description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync @@ -473,7 +473,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_etcdHighNumberOfFailedProposals annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal @@ -488,7 +488,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_etcdMembersDown annotations: description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value @@ -510,7 +510,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_etcdNoLeader annotations: description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance @@ -525,7 +525,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-extremely-high-individual-control-plane-memory rules: - alert: SYN_ExtremelyHighIndividualControlPlaneMemory @@ -563,7 +563,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-general.rules rules: - alert: Watchdog @@ -583,7 +583,7 @@ spec: severity: none syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-high-overall-control-plane-memory rules: - alert: SYN_HighOverallControlPlaneMemory @@ -623,7 +623,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-kube-state-metrics rules: - alert: SYN_KubeStateMetricsWatchErrors @@ -643,7 +643,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-kubernetes-apps rules: - alert: SYN_KubeContainerWaiting @@ -660,7 +660,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeDaemonSetMisScheduled annotations: description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ @@ -674,7 +674,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeDaemonSetNotScheduled annotations: description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ @@ -690,7 +690,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeDaemonSetRolloutStuck annotations: description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} @@ -726,7 +726,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeDeploymentGenerationMismatch annotations: description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment @@ -743,7 +743,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeDeploymentRolloutStuck annotations: description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment @@ -758,7 +758,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeJobFailed annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed @@ -774,7 +774,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeJobNotCompleted annotations: description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking @@ -789,7 +789,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubePodCrashLooping annotations: description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container @@ -803,7 +803,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubePodNotReady annotations: description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in @@ -825,7 +825,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeStatefulSetGenerationMismatch annotations: description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset @@ -842,7 +842,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeStatefulSetReplicasMismatch annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset @@ -865,7 +865,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeStatefulSetUpdateNotRolledOut annotations: description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset @@ -895,7 +895,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-kubernetes-storage rules: - alert: SYN_KubePersistentVolumeErrors @@ -911,7 +911,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim @@ -937,7 +937,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeFillingUp annotations: description: Based on recent sampling, the PersistentVolume claimed by @@ -966,7 +966,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim @@ -992,7 +992,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: Based on recent sampling, the PersistentVolume claimed by @@ -1021,7 +1021,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-kubernetes-system rules: - alert: SYN_KubeClientErrors @@ -1040,7 +1040,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-kubernetes-system-apiserver rules: - alert: SYN_KubeAPIDown @@ -1056,7 +1056,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeAPITerminatedRequests annotations: description: The kubernetes apiserver has terminated {{ $value | humanizePercentage @@ -1071,7 +1071,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeAggregatedAPIDown annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace @@ -1085,7 +1085,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeAggregatedAPIErrors annotations: description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace @@ -1099,7 +1099,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-kubernetes-system-kubelet rules: - alert: SYN_KubeNodeNotReady @@ -1115,7 +1115,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeNodeReadinessFlapping annotations: description: The readiness status of node {{ $labels.node }} has changed @@ -1129,7 +1129,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeNodeUnreachable annotations: description: '{{ $labels.node }} is unreachable and some workloads may @@ -1143,7 +1143,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeletClientCertificateRenewalErrors annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its @@ -1157,7 +1157,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeletDown annotations: description: Kubelet has disappeared from Prometheus target discovery. @@ -1172,7 +1172,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeletPlegDurationHigh annotations: description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile @@ -1186,7 +1186,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeletPodStartUpLatencyHigh annotations: description: Kubelet Pod startup 99th percentile latency is {{ $value @@ -1200,7 +1200,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeletServerCertificateRenewalErrors annotations: description: Kubelet on node {{ $labels.node }} has failed to renew its @@ -1214,7 +1214,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-machine-api-operator-metrics-collector-up rules: - alert: SYN_MachineAPIOperatorMetricsCollectionFailing @@ -1230,7 +1230,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-machine-health-check-unterminated-short-circuit rules: - alert: SYN_MachineHealthCheckUnterminatedShortCircuit @@ -1248,7 +1248,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-machine-not-yet-deleted rules: - alert: SYN_MachineNotYetDeleted @@ -1267,7 +1267,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-machine-with-no-running-phase rules: - alert: SYN_MachineWithNoRunningPhase @@ -1285,7 +1285,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-machine-without-valid-node-ref rules: - alert: SYN_MachineWithoutValidNode @@ -1302,7 +1302,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-mcc-drain-error rules: - alert: SYN_MCCDrainError @@ -1319,7 +1319,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-mcd-kubelet-health-state-error rules: - alert: SYN_KubeletHealthState @@ -1333,7 +1333,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-mcd-pivot-error rules: - alert: SYN_MCDPivotError @@ -1350,7 +1350,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-mcd-reboot-error rules: - alert: SYN_MCDRebootError @@ -1367,7 +1367,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-node-exporter rules: - alert: SYN_NodeClockNotSynchronising @@ -1386,7 +1386,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeClockSkewDetected annotations: description: Clock at {{ $labels.instance }} is out of sync by more than @@ -1410,7 +1410,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeDiskIOSaturation annotations: description: | @@ -1425,7 +1425,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeFileDescriptorLimit annotations: description: File descriptors limit at {{ $labels.instance }} is currently @@ -1442,7 +1442,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemAlmostOutOfFiles annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1462,7 +1462,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemAlmostOutOfSpace annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1482,7 +1482,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemFilesFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1505,7 +1505,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1528,7 +1528,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeFilesystemSpaceFillingUp annotations: description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint @@ -1551,7 +1551,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeHighNumberConntrackEntriesUsed annotations: description: '{{ $value | humanizePercentage }} of conntrack entries are @@ -1564,7 +1564,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeMemoryHighUtilization annotations: description: | @@ -1578,7 +1578,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeMemoryMajorPagesFaults annotations: description: | @@ -1593,7 +1593,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeNetworkReceiveErrs annotations: description: '{{ $labels.instance }} interface {{ $labels.device }} has @@ -1608,7 +1608,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeNetworkTransmitErrs annotations: description: '{{ $labels.instance }} interface {{ $labels.device }} has @@ -1623,7 +1623,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_NodeSystemdServiceFailed annotations: description: Systemd service {{ $labels.name }} has entered failed state @@ -1637,7 +1637,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-node-network rules: - alert: SYN_NodeNetworkInterfaceFlapping @@ -1654,7 +1654,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-node-utilization rules: - alert: SYN_node_memory_free_percent @@ -1669,7 +1669,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-openshift-etcd.rules rules: - alert: SYN_etcdGRPCRequestsSlow @@ -1688,7 +1688,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_etcdHighNumberOfFailedGRPCRequests annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests @@ -1707,7 +1707,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_etcdHighNumberOfLeaderChanges annotations: description: 'etcd cluster "{{ $labels.job }}": {{ $value }} average leader @@ -1723,7 +1723,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_etcdInsufficientMembers annotations: description: etcd is reporting fewer instances are available than are @@ -1745,7 +1745,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-openshift-general.rules rules: - alert: SYN_TargetDown @@ -1772,7 +1772,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-openshift-ingress-to-route-controller.rules rules: - alert: SYN_UnmanagedRoutes @@ -1789,7 +1789,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-openshift-ingress.rules rules: - alert: SYN_HAProxyDown @@ -1805,7 +1805,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_HAProxyReloadFail annotations: description: This alert fires when HAProxy fails to reload its configuration, @@ -1821,7 +1821,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_IngressControllerDegraded annotations: description: This alert fires when the IngressController status is degraded. @@ -1836,7 +1836,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_IngressControllerUnavailable annotations: description: This alert fires when the IngressController is not available. @@ -1851,7 +1851,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-openshift-kubernetes.rules rules: - alert: SYN_ClusterMonitoringOperatorReconciliationErrors @@ -1868,7 +1868,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubeDeploymentReplicasMismatch annotations: description: Deployment {{ $labels.namespace }}/{{ $labels.deployment @@ -1898,7 +1898,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_KubePodNotScheduled annotations: description: |- @@ -1914,7 +1914,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-prometheus rules: - alert: SYN_PrometheusBadConfig @@ -1932,7 +1932,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusDuplicateTimestamps annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping @@ -1947,7 +1947,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusErrorSendingAlertsToSomeAlertmanagers annotations: description: '{{ printf "%.1f" $value }}% errors while sending alerts @@ -1969,7 +1969,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusHighQueryLoad annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} query API @@ -1985,7 +1985,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusLabelLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped @@ -2001,7 +2001,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusMissingRuleEvaluations annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has missed @@ -2016,7 +2016,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusNotConnectedToAlertmanagers annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not connected @@ -2032,7 +2032,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusNotIngestingSamples annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is not ingesting @@ -2054,7 +2054,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusNotificationQueueRunningFull annotations: description: Alert notification queue of Prometheus {{$labels.namespace}}/{{$labels.pod}} @@ -2075,7 +2075,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusOutOfOrderTimestamps annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} is dropping @@ -2090,7 +2090,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusRemoteStorageFailures annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} failed to @@ -2115,7 +2115,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusRemoteWriteDesiredShards annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} remote write @@ -2140,7 +2140,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusRuleFailures annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2155,7 +2155,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusSDRefreshFailure annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2169,7 +2169,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusScrapeBodySizeLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2184,7 +2184,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusScrapeSampleLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has failed @@ -2200,7 +2200,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusTSDBCompactionsFailing annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected @@ -2214,7 +2214,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusTSDBReloadsFailing annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has detected @@ -2228,7 +2228,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusTargetLimitHit annotations: description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped @@ -2244,7 +2244,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusTargetSyncFailure annotations: description: '{{ printf "%.0f" $value }} targets in Prometheus {{$labels.namespace}}/{{$labels.pod}} @@ -2259,7 +2259,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-prometheus-operator rules: - alert: SYN_PrometheusOperatorNotReady @@ -2275,7 +2275,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorReconcileErrors annotations: description: '{{ $value | humanizePercentage }} of reconciling operations @@ -2290,7 +2290,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorRejectedResources annotations: description: Prometheus operator in {{ $labels.namespace }} namespace @@ -2305,7 +2305,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace @@ -2319,7 +2319,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorWatchErrors annotations: description: Errors while performing watch operations in controller {{$labels.controller}} @@ -2333,7 +2333,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-system-memory-exceeds-reservation rules: - alert: SYN_SystemMemoryExceedsReservation @@ -2355,7 +2355,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-thanos-query rules: - alert: SYN_ThanosQueryHttpRequestQueryErrorRateHigh @@ -2375,7 +2375,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_ThanosQueryHttpRequestQueryRangeErrorRateHigh annotations: description: Thanos Query {{$labels.job}} in {{$labels.namespace}} is @@ -2393,7 +2393,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_ThanosQueryOverload annotations: description: Thanos Query {{$labels.job}} in {{$labels.namespace}} has @@ -2414,7 +2414,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - name: syn-thanos-rule rules: - alert: SYN_ThanosNoRuleEvaluations @@ -2432,7 +2432,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_ThanosRuleGrpcErrorRate annotations: description: Thanos Rule {{$labels.job}} in {{$labels.namespace}} is failing @@ -2451,7 +2451,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_ThanosRuleHighRuleEvaluationFailures annotations: description: Thanos Rule {{$labels.instance}} in {{$labels.namespace}} @@ -2470,7 +2470,7 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_ThanosRuleQueueIsDroppingAlerts annotations: description: Thanos Rule {{$labels.instance}} in {{$labels.namespace}} @@ -2485,7 +2485,7 @@ spec: severity: critical syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys - alert: SYN_ThanosRuleSenderIsFailingAlerts annotations: description: Thanos Rule {{$labels.instance}} in {{$labels.namespace}} @@ -2499,4 +2499,4 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring - syn_team: other_team + syn_team: clumsy-donkeys diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/rbac.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/rbac.yaml similarity index 100% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/rbac.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/rbac.yaml diff --git a/tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/silence.yaml similarity index 100% rename from tests/golden/team-label/openshift4-monitoring/openshift4-monitoring/silence.yaml rename to tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/silence.yaml diff --git a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 1d884f15..5035dc3d 100644 --- a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -24,8 +24,16 @@ stringData: "severity": "warning" "target_match_re": "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" "group_wait": "0s" "repeat_interval": "10m" + "routes": + - "continue": false + "matchers": + - "namespace =~ \"\"" + "receiver": "__component_openshift4_monitoring_null" + - "receiver": "__component_openshift4_monitoring_null" type: Opaque diff --git a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 1d884f15..5035dc3d 100644 --- a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -24,8 +24,16 @@ stringData: "severity": "warning" "target_match_re": "severity": "info" + "receivers": + - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" "group_wait": "0s" "repeat_interval": "10m" + "routes": + - "continue": false + "matchers": + - "namespace =~ \"\"" + "receiver": "__component_openshift4_monitoring_null" + - "receiver": "__component_openshift4_monitoring_null" type: Opaque diff --git a/tests/team-label.yml b/tests/team-label.yml deleted file mode 100644 index 924be78e..00000000 --- a/tests/team-label.yml +++ /dev/null @@ -1,30 +0,0 @@ -# Overwrite parameters here - -parameters: - kapitan: - dependencies: - - type: https - source: https://raw.githubusercontent.com/projectsyn/component-patch-operator/master/lib/patch-operator.libsonnet - output_path: vendor/lib/patch-operator.libsonnet - - patch_operator: - namespace: syn-patch-operator - patch_serviceaccount: - name: patch-sa - - syn: - owner: my_team - teams: - other_team: - instances: - - 'openshift4-monitoring' - - openshift4_monitoring: - rules: - my-rules: - "alert:MyAlert": - expr: 'vector(1)' - labels: - syn_team: yet_another_team - "alert:NoLabels": - expr: 'vector(1)' diff --git a/tests/team-routing.yml b/tests/team-routing.yml index a4da5b7b..62806246 100644 --- a/tests/team-routing.yml +++ b/tests/team-routing.yml @@ -1,3 +1,90 @@ -# Overwrite parameters here +applications: + - non-existing + - no-ns + - ns-string + - ns-object + - base as ns-in-base + - base as ns-overridden + - non-existing as still-non-existing + - shared-responsibility + - same-ns-1 + - same-ns-2 -# parameters: {...} +parameters: + kapitan: + dependencies: + - type: https + source: https://raw.githubusercontent.com/projectsyn/component-patch-operator/master/lib/patch-operator.libsonnet + output_path: vendor/lib/patch-operator.libsonnet + + patch_operator: + namespace: syn-patch-operator + patch_serviceaccount: + name: patch-sa + + syn: + owner: clumsy-donkeys + teams: + chubby-cockroaches: + instances: + - ns-in-base + - ns-overridden + - shared-responsibility + lovable-lizards: + instances: + - ns-object + - shared-responsibility + - same-ns-2 + + openshift4_monitoring: + alertManagerConfig: + route: + routes: + - receiver: other + matchers: + - other = "true" + alertManagerAutoDiscovery: + debug_config_map: true + team_receiver_format: team_default_%s + additional_alert_matchers: + - 'syn_team = ""' + prepend_routes: + - matchers: + - alertname = Watchdog + repeat_interval: 60s + receiver: heartbeat + + rules: + my-rules: + "alert:MyAlert": + expr: 'vector(1)' + labels: + syn_team: yet_another_team + "alert:NoLabels": + expr: 'vector(1)' + + no_ns: {} + + ns_string: + namespace: "ns-string" + + ns_object: + namespace: + name: "ns-object" + + base: + namespace: base + + ns_in_base: {} + + ns_overridden: + namespace: overridden + + shared_responsibility: + namespace: shared-responsibility + + same_ns_1: + namespace: same-ns + + same_ns_2: + namespace: same-ns From 0946800ba1e3a9024b6012a5f1f35e73ef8ccca2 Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Tue, 3 Oct 2023 10:06:00 +0200 Subject: [PATCH 4/5] Improve wording in documentation Co-authored-by: Simon Gerber --- docs/modules/ROOT/pages/references/parameters.adoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/modules/ROOT/pages/references/parameters.adoc b/docs/modules/ROOT/pages/references/parameters.adoc index 0497c334..b3b1aed8 100644 --- a/docs/modules/ROOT/pages/references/parameters.adoc +++ b/docs/modules/ROOT/pages/references/parameters.adoc @@ -289,7 +289,9 @@ alertManagerAutoDiscovery: `alertManagerAutoDiscovery` holds the configuration for the Alertmanager auto-discovery feature. The auto-discovery routes alerts to the configured teams based on their namespaces and the top-level `syn.teams[*].instances` and `syn.owner` parameters. -It tries to find the namespaces associated with a Commodore application (called instance in the `syn` configuration) in the applications array, and matches the namespace to the team based on the `syn.teams[*].instances` parameter. +Auto-discovery first creates a list of Commodore component instances by parsing the `applications` array using the same rules as Commodore itself (see also the https://syn.tools/commodore/reference/architecture.html#_component_instantiation[Commodore component instantiation documentation]). +For each discovered instance, the component then renders the instance parameters, and reads the cmoponent's namespace from field `namespace` or `namespace.name` in the rendered parameters. +Finally, routing rules are generated to route alerts from the discovered namespaces to the associated component instance's owning team. .`syn` Team Example [source,yaml] From 51c9fb318755da1ac8e4ace5a3a8c8c2b5bc1d90 Mon Sep 17 00:00:00 2001 From: Sebastian Widmer Date: Tue, 3 Oct 2023 14:05:14 +0200 Subject: [PATCH 5/5] Factor out team discovery to the prom library --- component/alert-routing-discovery.libsonnet | 59 +------------- ...shift4-monitoring-alert-patching.libsonnet | 35 +-------- lib/openshift4-monitoring-prom.libsonnet | 77 +++++++++++++++++++ .../10_alertmanager_config.yaml | 6 +- .../99_discovery_debug_cm.yaml | 45 ++++------- tests/team-routing.yml | 6 -- 6 files changed, 103 insertions(+), 125 deletions(-) diff --git a/component/alert-routing-discovery.libsonnet b/component/alert-routing-discovery.libsonnet index b7752af9..a45a8479 100644 --- a/component/alert-routing-discovery.libsonnet +++ b/component/alert-routing-discovery.libsonnet @@ -10,19 +10,6 @@ local adParams = params.openshift4_monitoring.alertManagerAutoDiscovery; local nullReceiver = '__component_openshift4_monitoring_null'; -// appKeys returns the (aliased) application name and if aliased the original name in the second position. -// The application name is translated from kebab-case to snake_case, except if the second parameter is set to true. -local appKeys = function(name, raw=false) - local normalized = function(name) if raw then name else std.strReplace(name, '-', '_'); - // can be simplified with jsonnet > 0.19 which would support ' as ' as the substring - local parts = std.split(name, ' '); - if std.length(parts) == 1 then - [ normalized(parts[0]) ] - else if std.length(parts) == 3 && parts[1] == 'as' then - [ normalized(parts[2]), normalized(parts[0]) ] - else - error 'invalid application name `%s`' % name; - // discoverNS returns the namespace for the given application. // It looks into the follwing places: // - params..namespace @@ -38,7 +25,7 @@ local discoverNS = function(app) else if std.isObject(p.namespace) && std.objectHas(p.namespace, 'name') && std.isString(p.namespace.name) then p.namespace.name; - local ks = appKeys(app); + local ks = prom.appKeys(app); local aliased = f(ks[0]); if aliased != null then aliased @@ -52,50 +39,10 @@ local ownerOrFallbackTeam = else params.openshift4_monitoring.fallback_team; -// teamsForApplication returns the teams for the given application. -// It does so by looking at the top level syn parameter. -// The syn parameter should look roughly like this. -// -// syn: -// owner: clumsy-donkeys -// teams: -// chubby-cockroaches: -// instances: -// - superb-visualization -// lovable-lizards: -// instances: -// - apartment-cats -// -// The application is first looked up in the instances of the teams, if no team is found, owner is used as fallback. -local teamsForApplication = function(app) - local lookup = function(app) - if std.objectHas(params, 'syn') && std.objectHas(params.syn, 'teams') then - local teams = params.syn.teams; - std.foldl( - function(prev, team) - if std.objectHas(teams, team) && std.objectHas(teams[team], 'instances') && std.member(teams[team].instances, app) then - prev + [ team ] - else - prev, - std.objectFields(teams), - [], - ); - - local teams = std.prune(std.map(lookup, appKeys(app, true))); - - if std.length(teams) > 0 then - teams[0] - else - [ ownerOrFallbackTeam ]; - // teamToNS is a map from a team to namespaces. local teamToNS = std.foldl( function(prev, app) - local tms = teamsForApplication(app); - std.foldl( - function(prev, tm) prev { [tm]+: [ discoverNS(app) ] }, tms, prev - ) - , + prev { [prom.teamForApplication(app)]+: [ discoverNS(app) ] }, inv.applications, {} ); @@ -136,7 +83,7 @@ local alertmanagerConfig = debugConfigMap: kube.ConfigMap('discovery-debug') { data: { local discoveredNamespaces = std.foldl(function(prev, app) prev { [app]: discoverNS(app) }, inv.applications, {}), - local discoveredTeams = std.foldl(function(prev, app) prev { [app]: teamsForApplication(app) }, inv.applications, {}), + local discoveredTeams = std.foldl(function(prev, app) prev { [app]: prom.teamForApplication(app) }, inv.applications, {}), applications: std.manifestJsonMinified(inv.applications), discovered_namespaces: std.manifestYamlDoc(discoveredNamespaces), apps_without_namespaces: std.manifestYamlDoc(std.foldl(function(prev, app) if discoveredNamespaces[app] == null then prev + [ app ] else prev, std.objectFields(discoveredNamespaces), [])), diff --git a/lib/openshift4-monitoring-alert-patching.libsonnet b/lib/openshift4-monitoring-alert-patching.libsonnet index 3a4fabe0..637b4eb8 100644 --- a/lib/openshift4-monitoring-alert-patching.libsonnet +++ b/lib/openshift4-monitoring-alert-patching.libsonnet @@ -2,6 +2,8 @@ // arbitrary alert rules to adhere to the format required by the component's // approach for allowing us to patch upstream rules. local com = import 'lib/commodore.libjsonnet'; +local prom = import 'lib/prom.libsonnet'; + local inv = com.inventory(); local global_alert_params = @@ -20,35 +22,6 @@ local global_alert_params = customAnnotations: {}, }; -local syn_team = - local instance = inv.parameters._instance; - local syn = if std.objectHas(inv.parameters, 'syn') then { - owner: std.get(inv.parameters.syn, 'owner', ''), - teams: std.get(inv.parameters.syn, 'teams', { teams: {} }), - } else { owner: '', teams: {} }; - local team_instances = [ - { - team: tn, - instances: std.get(syn.teams[tn], 'instances', []), - } - for tn in std.objectFields(syn.teams) - ]; - local team = std.foldl( - function(o, ti) - if std.member(ti.instances, instance) then - o + [ ti.team ] - else - o, - team_instances, - [] - ); - if std.length(team) > 1 then - error "Multiple owners for instance '%s': %s" % [ instance, team ] - else if std.length(team) == 1 then - team[0] - else - syn.owner; - /** * \brief filter alert rules in the provided group * @@ -140,7 +113,7 @@ local patchRule(rule, patches={}, patchName=true) = then rule.labels.syn_team else - syn_team; + prom.teamForApplication(inv.parameters._instance); rule { // Change alert names so we don't get multiple alerts with the same // name, as the logging operator deploys its own copy of these @@ -153,7 +126,7 @@ local patchRule(rule, patches={}, patchName=true) = syn_component: inv.parameters._instance, // mark alert as belonging to the team in whose context the // function is called. - [if syn_team_label != '' then 'syn_team']: syn_team_label, + [if syn_team_label != null then 'syn_team']: syn_team_label, }, annotations+: std.get(global_alert_params.customAnnotations, super.alert, {}), diff --git a/lib/openshift4-monitoring-prom.libsonnet b/lib/openshift4-monitoring-prom.libsonnet index b56ca4c3..9e027948 100644 --- a/lib/openshift4-monitoring-prom.libsonnet +++ b/lib/openshift4-monitoring-prom.libsonnet @@ -9,6 +9,8 @@ local kube = import 'lib/kube.libjsonnet'; local alertpatching = import 'lib/alert-patching.libsonnet'; +local inv = com.inventory(); + // Define Prometheus Operator API versions local api_version = { monitoring: 'monitoring.coreos.com/v1', @@ -55,6 +57,81 @@ local prometheusRule(name) = Alertmanager(name): kube._Object(api_version.monitoring, 'Alertmanager', name), + /** + * \brief Returns an array with the (aliased) application name and if aliased the original name in the second position. + * + * The application name is translated from kebab-case to snake_case, except if the second parameter is set to true. + * + * \arg name + * The application name. Can be `name` or `name as alias`. + * \arg raw + * If set to true, the application name is not translated from kebab-case to snake_case. + * \return + * An array with the (aliased) application name and if aliased the original name in the second position. + */ + appKeys: function(name, raw=false) + local normalized = function(name) if raw then name else std.strReplace(name, '-', '_'); + // can be simplified with jsonnet > 0.19 which would support ' as ' as the substring + local parts = std.split(name, ' '); + if std.length(parts) == 1 then + [ normalized(parts[0]) ] + else if std.length(parts) == 3 && parts[1] == 'as' then + [ normalized(parts[2]), normalized(parts[0]) ] + else + error 'invalid application name `%s`' % name, + + /** + * \brief Returns the team for the given application or null. + * + * It does so by looking at the top level syn parameter. + * The syn parameter should look roughly like this. + * + * syn: + * owner: clumsy-donkeys + * teams: + * chubby-cockroaches: + * instances: + * - superb-visualization + * lovable-lizards: + * instances: + * - apartment-cats + * + * The application is first looked up in the instances of the teams, if no team is found, owner is used as fallback. + * An error is thrown if the application is found belonging to multiple teams. + * + * \arg app + * The application name. Can be the merged `inventory().params._instance` or an (aliased) application name. + * \return + * The team name or `null` if no team is found. + */ + teamForApplication: function(app) + local params = inv.parameters; + local lookup = function(app) + if std.objectHas(params, 'syn') && std.objectHas(params.syn, 'teams') then + local teams = params.syn.teams; + local teamsForApp = std.foldl( + function(prev, team) + if std.objectHas(teams, team) && std.objectHas(teams[team], 'instances') && std.member(teams[team].instances, app) then + prev + [ team ] + else + prev, + std.objectFields(teams), + [], + ); + if std.length(teamsForApp) == 0 then + null + else if std.length(teamsForApp) == 1 then + teamsForApp[0] + else + error 'application `%s` is in multiple teams: %s' % [ app, std.join(', ', teamsForApp) ]; + + local teams = std.prune(std.map(lookup, self.appKeys(app, true))); + + if std.length(teams) > 0 then + teams[0] + else if std.objectHas(params, 'syn') && std.objectHas(params.syn, 'owner') then + params.syn.owner, + /** * \brief Function to render rules defined in the hierarchy * diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index 4c71f782..45ddc410 100644 --- a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -38,7 +38,7 @@ stringData: - "continue": true "matchers": - "syn_team = \"\"" - - "namespace =~ \"base|overridden|shared-responsibility\"" + - "namespace =~ \"base|overridden\"" "receiver": "team_default_chubby-cockroaches" - "continue": true "matchers": @@ -48,12 +48,12 @@ stringData: - "continue": true "matchers": - "syn_team = \"\"" - - "namespace =~ \"ns-object|shared-responsibility|same-ns\"" + - "namespace =~ \"ns-object|same-ns\"" "receiver": "team_default_lovable-lizards" - "continue": false "matchers": - "syn_team = \"\"" - - "namespace =~ \"base|overridden|shared-responsibility|ns-string|same-ns|ns-object|shared-responsibility|same-ns\"" + - "namespace =~ \"base|overridden|ns-string|same-ns|ns-object|same-ns\"" "receiver": "__component_openshift4_monitoring_null" - "matchers": - "other = \"true\"" diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml index 538e610e..3cd8f160 100644 --- a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml @@ -30,7 +30,7 @@ data: - "continue": true "matchers": - "syn_team = \"\"" - - "namespace =~ \"base|overridden|shared-responsibility\"" + - "namespace =~ \"base|overridden\"" "receiver": "team_default_chubby-cockroaches" - "continue": true "matchers": @@ -40,19 +40,19 @@ data: - "continue": true "matchers": - "syn_team = \"\"" - - "namespace =~ \"ns-object|shared-responsibility|same-ns\"" + - "namespace =~ \"ns-object|same-ns\"" "receiver": "team_default_lovable-lizards" - "continue": false "matchers": - "syn_team = \"\"" - - "namespace =~ \"base|overridden|shared-responsibility|ns-string|same-ns|ns-object|shared-responsibility|same-ns\"" + - "namespace =~ \"base|overridden|ns-string|same-ns|ns-object|same-ns\"" "receiver": "__component_openshift4_monitoring_null" - "matchers": - "other = \"true\"" "receiver": "other" - "receiver": "team_default_clumsy-donkeys" applications: '["non-existing","no-ns","ns-string","ns-object","base as ns-in-base","base - as ns-overridden","non-existing as still-non-existing","shared-responsibility","same-ns-1","same-ns-2"]' + as ns-overridden","non-existing as still-non-existing","same-ns-1","same-ns-2"]' apps_without_namespaces: |- - "no-ns" - "non-existing" @@ -67,34 +67,21 @@ data: "ns-string": "ns-string" "same-ns-1": "same-ns" "same-ns-2": "same-ns" - "shared-responsibility": "shared-responsibility" discovered_teams: |- - "base as ns-in-base": - - "chubby-cockroaches" - "base as ns-overridden": - - "chubby-cockroaches" - "no-ns": - - "clumsy-donkeys" - "non-existing": - - "clumsy-donkeys" - "non-existing as still-non-existing": - - "clumsy-donkeys" - "ns-object": - - "lovable-lizards" - "ns-string": - - "clumsy-donkeys" - "same-ns-1": - - "clumsy-donkeys" - "same-ns-2": - - "lovable-lizards" - "shared-responsibility": - - "chubby-cockroaches" - - "lovable-lizards" + "base as ns-in-base": "chubby-cockroaches" + "base as ns-overridden": "chubby-cockroaches" + "no-ns": "clumsy-donkeys" + "non-existing": "clumsy-donkeys" + "non-existing as still-non-existing": "clumsy-donkeys" + "ns-object": "lovable-lizards" + "ns-string": "clumsy-donkeys" + "same-ns-1": "clumsy-donkeys" + "same-ns-2": "lovable-lizards" proposed_routes: |- - "continue": true "matchers": - "syn_team = \"\"" - - "namespace =~ \"base|overridden|shared-responsibility\"" + - "namespace =~ \"base|overridden\"" "receiver": "team_default_chubby-cockroaches" - "continue": true "matchers": @@ -104,12 +91,12 @@ data: - "continue": true "matchers": - "syn_team = \"\"" - - "namespace =~ \"ns-object|shared-responsibility|same-ns\"" + - "namespace =~ \"ns-object|same-ns\"" "receiver": "team_default_lovable-lizards" - "continue": false "matchers": - "syn_team = \"\"" - - "namespace =~ \"base|overridden|shared-responsibility|ns-string|same-ns|ns-object|shared-responsibility|same-ns\"" + - "namespace =~ \"base|overridden|ns-string|same-ns|ns-object|same-ns\"" "receiver": "__component_openshift4_monitoring_null" kind: ConfigMap metadata: diff --git a/tests/team-routing.yml b/tests/team-routing.yml index 62806246..d500820b 100644 --- a/tests/team-routing.yml +++ b/tests/team-routing.yml @@ -6,7 +6,6 @@ applications: - base as ns-in-base - base as ns-overridden - non-existing as still-non-existing - - shared-responsibility - same-ns-1 - same-ns-2 @@ -29,11 +28,9 @@ parameters: instances: - ns-in-base - ns-overridden - - shared-responsibility lovable-lizards: instances: - ns-object - - shared-responsibility - same-ns-2 openshift4_monitoring: @@ -80,9 +77,6 @@ parameters: ns_overridden: namespace: overridden - shared_responsibility: - namespace: shared-responsibility - same_ns_1: namespace: same-ns