From 8bf4b7f1ee24e4f5c73abfb92487788a42870366 Mon Sep 17 00:00:00 2001 From: Simon Gerber Date: Wed, 3 Jan 2024 15:55:44 +0100 Subject: [PATCH 1/2] Update golden test outputs Update golden test outputs to match latest upstream manifests --- .../prometheus_rules.yaml | 53 +++++++++++------- .../prometheus_rules.yaml | 53 +++++++++++------- .../prometheus_rules.yaml | 53 +++++++++++------- .../prometheus_rules.yaml | 53 +++++++++++------- .../prometheus_rules.yaml | 53 +++++++++++------- .../prometheus_rules.yaml | 53 +++++++++++------- .../prometheus_rules.yaml | 54 ++++++++++++------- .../prometheus_rules.yaml | 53 +++++++++++------- .../prometheus_rules.yaml | 53 +++++++++++------- 9 files changed, 307 insertions(+), 171 deletions(-) diff --git a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 97b5219d..c7d2f5a9 100644 --- a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -856,8 +856,8 @@ spec: rules: - alert: SYN_KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has - status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster + {{ $labels.cluster }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. syn_component: openshift4-monitoring expr: | @@ -870,8 +870,8 @@ spec: - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage - }} free. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -883,9 +883,9 @@ spec: ) < 0.03 and kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -896,8 +896,8 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to fill up within four days. Currently {{ $value | humanizePercentage - }} is available. + }} on Cluster {{ $labels.cluster }} is expected to fill up within four + days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -911,9 +911,9 @@ spec: kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -923,8 +923,8 @@ spec: - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage - }} free inodes. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -936,9 +936,9 @@ spec: ) < 0.03 and kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -949,8 +949,9 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to run out of inodes within four days. Currently {{ $value - | humanizePercentage }} of its inodes are free. + }} on Cluster {{ $labels.cluster }} is expected to run out of inodes + within four days. Currently {{ $value | humanizePercentage }} of its + inodes are free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -964,9 +965,9 @@ spec: kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -2172,7 +2173,7 @@ spec: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' - summary: Errors while reconciling controller. + summary: Errors while reconciling objects. syn_component: openshift4-monitoring expr: | (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 @@ -2195,6 +2196,20 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + - alert: SYN_PrometheusOperatorStatusUpdateErrors + annotations: + description: '{{ $value | humanizePercentage }} of status update operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace + }} namespace.' + summary: Errors while updating objects status. + syn_component: openshift4-monitoring + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-monitoring - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 97b5219d..c7d2f5a9 100644 --- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -856,8 +856,8 @@ spec: rules: - alert: SYN_KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has - status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster + {{ $labels.cluster }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. syn_component: openshift4-monitoring expr: | @@ -870,8 +870,8 @@ spec: - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage - }} free. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -883,9 +883,9 @@ spec: ) < 0.03 and kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -896,8 +896,8 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to fill up within four days. Currently {{ $value | humanizePercentage - }} is available. + }} on Cluster {{ $labels.cluster }} is expected to fill up within four + days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -911,9 +911,9 @@ spec: kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -923,8 +923,8 @@ spec: - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage - }} free inodes. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -936,9 +936,9 @@ spec: ) < 0.03 and kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -949,8 +949,9 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to run out of inodes within four days. Currently {{ $value - | humanizePercentage }} of its inodes are free. + }} on Cluster {{ $labels.cluster }} is expected to run out of inodes + within four days. Currently {{ $value | humanizePercentage }} of its + inodes are free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -964,9 +965,9 @@ spec: kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -2172,7 +2173,7 @@ spec: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' - summary: Errors while reconciling controller. + summary: Errors while reconciling objects. syn_component: openshift4-monitoring expr: | (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 @@ -2195,6 +2196,20 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + - alert: SYN_PrometheusOperatorStatusUpdateErrors + annotations: + description: '{{ $value | humanizePercentage }} of status update operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace + }} namespace.' + summary: Errors while updating objects status. + syn_component: openshift4-monitoring + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-monitoring - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace diff --git a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 3cc23b8f..33d61947 100644 --- a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -859,8 +859,8 @@ spec: rules: - alert: SYN_KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has - status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster + {{ $labels.cluster }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. syn_component: openshift4-monitoring expr: | @@ -873,8 +873,8 @@ spec: - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage - }} free. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -886,9 +886,9 @@ spec: ) < 0.03 and kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -899,8 +899,8 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to fill up within four days. Currently {{ $value | humanizePercentage - }} is available. + }} on Cluster {{ $labels.cluster }} is expected to fill up within four + days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -914,9 +914,9 @@ spec: kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -926,8 +926,8 @@ spec: - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage - }} free inodes. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -939,9 +939,9 @@ spec: ) < 0.03 and kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -952,8 +952,9 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to run out of inodes within four days. Currently {{ $value - | humanizePercentage }} of its inodes are free. + }} on Cluster {{ $labels.cluster }} is expected to run out of inodes + within four days. Currently {{ $value | humanizePercentage }} of its + inodes are free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -967,9 +968,9 @@ spec: kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -2175,7 +2176,7 @@ spec: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' - summary: Errors while reconciling controller. + summary: Errors while reconciling objects. syn_component: openshift4-monitoring expr: | (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 @@ -2198,6 +2199,20 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + - alert: SYN_PrometheusOperatorStatusUpdateErrors + annotations: + description: '{{ $value | humanizePercentage }} of status update operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace + }} namespace.' + summary: Errors while updating objects status. + syn_component: openshift4-monitoring + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-monitoring - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace diff --git a/tests/golden/release-4.12/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/release-4.12/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 11c97564..8c866949 100644 --- a/tests/golden/release-4.12/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/release-4.12/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -835,8 +835,8 @@ spec: rules: - alert: SYN_KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has - status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster + {{ $labels.cluster }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. syn_component: openshift4-monitoring expr: | @@ -849,8 +849,8 @@ spec: - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage - }} free. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -862,9 +862,9 @@ spec: ) < 0.03 and kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -875,8 +875,8 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to fill up within four days. Currently {{ $value | humanizePercentage - }} is available. + }} on Cluster {{ $labels.cluster }} is expected to fill up within four + days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -890,9 +890,9 @@ spec: kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -902,8 +902,8 @@ spec: - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage - }} free inodes. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -915,9 +915,9 @@ spec: ) < 0.03 and kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -928,8 +928,9 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to run out of inodes within four days. Currently {{ $value - | humanizePercentage }} of its inodes are free. + }} on Cluster {{ $labels.cluster }} is expected to run out of inodes + within four days. Currently {{ $value | humanizePercentage }} of its + inodes are free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -943,9 +944,9 @@ spec: kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -2170,7 +2171,7 @@ spec: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' - summary: Errors while reconciling controller. + summary: Errors while reconciling objects. syn_component: openshift4-monitoring expr: | (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 @@ -2193,6 +2194,20 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + - alert: SYN_PrometheusOperatorStatusUpdateErrors + annotations: + description: '{{ $value | humanizePercentage }} of status update operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace + }} namespace.' + summary: Errors while updating objects status. + syn_component: openshift4-monitoring + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-monitoring - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace diff --git a/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 97b5219d..c7d2f5a9 100644 --- a/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -856,8 +856,8 @@ spec: rules: - alert: SYN_KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has - status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster + {{ $labels.cluster }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. syn_component: openshift4-monitoring expr: | @@ -870,8 +870,8 @@ spec: - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage - }} free. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -883,9 +883,9 @@ spec: ) < 0.03 and kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -896,8 +896,8 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to fill up within four days. Currently {{ $value | humanizePercentage - }} is available. + }} on Cluster {{ $labels.cluster }} is expected to fill up within four + days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -911,9 +911,9 @@ spec: kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -923,8 +923,8 @@ spec: - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage - }} free inodes. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -936,9 +936,9 @@ spec: ) < 0.03 and kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -949,8 +949,9 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to run out of inodes within four days. Currently {{ $value - | humanizePercentage }} of its inodes are free. + }} on Cluster {{ $labels.cluster }} is expected to run out of inodes + within four days. Currently {{ $value | humanizePercentage }} of its + inodes are free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -964,9 +965,9 @@ spec: kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -2172,7 +2173,7 @@ spec: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' - summary: Errors while reconciling controller. + summary: Errors while reconciling objects. syn_component: openshift4-monitoring expr: | (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 @@ -2195,6 +2196,20 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + - alert: SYN_PrometheusOperatorStatusUpdateErrors + annotations: + description: '{{ $value | humanizePercentage }} of status update operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace + }} namespace.' + summary: Errors while updating objects status. + syn_component: openshift4-monitoring + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-monitoring - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace diff --git a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index 97b5219d..c7d2f5a9 100644 --- a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -856,8 +856,8 @@ spec: rules: - alert: SYN_KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has - status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster + {{ $labels.cluster }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. syn_component: openshift4-monitoring expr: | @@ -870,8 +870,8 @@ spec: - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage - }} free. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -883,9 +883,9 @@ spec: ) < 0.03 and kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -896,8 +896,8 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to fill up within four days. Currently {{ $value | humanizePercentage - }} is available. + }} on Cluster {{ $labels.cluster }} is expected to fill up within four + days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -911,9 +911,9 @@ spec: kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -923,8 +923,8 @@ spec: - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage - }} free inodes. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -936,9 +936,9 @@ spec: ) < 0.03 and kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -949,8 +949,9 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to run out of inodes within four days. Currently {{ $value - | humanizePercentage }} of its inodes are free. + }} on Cluster {{ $labels.cluster }} is expected to run out of inodes + within four days. Currently {{ $value | humanizePercentage }} of its + inodes are free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -964,9 +965,9 @@ spec: kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -2172,7 +2173,7 @@ spec: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' - summary: Errors while reconciling controller. + summary: Errors while reconciling objects. syn_component: openshift4-monitoring expr: | (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 @@ -2195,6 +2196,20 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + - alert: SYN_PrometheusOperatorStatusUpdateErrors + annotations: + description: '{{ $value | humanizePercentage }} of status update operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace + }} namespace.' + summary: Errors while updating objects status. + syn_component: openshift4-monitoring + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-monitoring - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index e38d46e5..cdd38e5f 100644 --- a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -900,8 +900,8 @@ spec: rules: - alert: SYN_KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has - status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster + {{ $labels.cluster }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. syn_component: openshift4-monitoring expr: | @@ -915,8 +915,8 @@ spec: - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage - }} free. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -928,9 +928,9 @@ spec: ) < 0.03 and kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -942,8 +942,8 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to fill up within four days. Currently {{ $value | humanizePercentage - }} is available. + }} on Cluster {{ $labels.cluster }} is expected to fill up within four + days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -957,9 +957,9 @@ spec: kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -970,8 +970,8 @@ spec: - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage - }} free inodes. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -983,9 +983,9 @@ spec: ) < 0.03 and kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -997,8 +997,9 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to run out of inodes within four days. Currently {{ $value - | humanizePercentage }} of its inodes are free. + }} on Cluster {{ $labels.cluster }} is expected to run out of inodes + within four days. Currently {{ $value | humanizePercentage }} of its + inodes are free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -1012,9 +1013,9 @@ spec: kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -2295,7 +2296,7 @@ spec: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' - summary: Errors while reconciling controller. + summary: Errors while reconciling objects. syn_component: openshift4-monitoring expr: | (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 @@ -2320,6 +2321,21 @@ spec: syn: 'true' syn_component: openshift4-monitoring syn_team: clumsy-donkeys + - alert: SYN_PrometheusOperatorStatusUpdateErrors + annotations: + description: '{{ $value | humanizePercentage }} of status update operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace + }} namespace.' + summary: Errors while updating objects status. + syn_component: openshift4-monitoring + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-monitoring + syn_team: clumsy-donkeys - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace diff --git a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index df2a1170..e741ffb7 100644 --- a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -856,8 +856,8 @@ spec: rules: - alert: SYN_KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has - status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster + {{ $labels.cluster }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. syn_component: openshift4-monitoring expr: | @@ -870,8 +870,8 @@ spec: - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage - }} free. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -883,9 +883,9 @@ spec: ) < 0.03 and kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -896,8 +896,8 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to fill up within four days. Currently {{ $value | humanizePercentage - }} is available. + }} on Cluster {{ $labels.cluster }} is expected to fill up within four + days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -911,9 +911,9 @@ spec: kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -923,8 +923,8 @@ spec: - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage - }} free inodes. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -936,9 +936,9 @@ spec: ) < 0.03 and kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -949,8 +949,9 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to run out of inodes within four days. Currently {{ $value - | humanizePercentage }} of its inodes are free. + }} on Cluster {{ $labels.cluster }} is expected to run out of inodes + within four days. Currently {{ $value | humanizePercentage }} of its + inodes are free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -964,9 +965,9 @@ spec: kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -2172,7 +2173,7 @@ spec: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' - summary: Errors while reconciling controller. + summary: Errors while reconciling objects. syn_component: openshift4-monitoring expr: | (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 @@ -2195,6 +2196,20 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + - alert: SYN_PrometheusOperatorStatusUpdateErrors + annotations: + description: '{{ $value | humanizePercentage }} of status update operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace + }} namespace.' + summary: Errors while updating objects status. + syn_component: openshift4-monitoring + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-monitoring - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace diff --git a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml index c13475f1..70103116 100644 --- a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml +++ b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml @@ -856,8 +856,8 @@ spec: rules: - alert: SYN_KubePersistentVolumeErrors annotations: - description: The persistent volume {{ $labels.persistentvolume }} has - status {{ $labels.phase }}. + description: The persistent volume {{ $labels.persistentvolume }} on Cluster + {{ $labels.cluster }} has status {{ $labels.phase }}. summary: PersistentVolume is having issues with provisioning. syn_component: openshift4-monitoring expr: | @@ -870,8 +870,8 @@ spec: - alert: SYN_KubePersistentVolumeFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage - }} free. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} is only {{ $value | humanizePercentage }} free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -883,9 +883,9 @@ spec: ) < 0.03 and kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -896,8 +896,8 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to fill up within four days. Currently {{ $value | humanizePercentage - }} is available. + }} on Cluster {{ $labels.cluster }} is expected to fill up within four + days. Currently {{ $value | humanizePercentage }} is available. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md summary: PersistentVolume is filling up. syn_component: openshift4-monitoring @@ -911,9 +911,9 @@ spec: kubelet_volume_stats_used_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_available_bytes{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -923,8 +923,8 @@ spec: - alert: SYN_KubePersistentVolumeInodesFillingUp annotations: description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim - }} in Namespace {{ $labels.namespace }} only has {{ $value | humanizePercentage - }} free inodes. + }} in Namespace {{ $labels.namespace }} on Cluster {{ $labels.cluster + }} only has {{ $value | humanizePercentage }} free inodes. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -936,9 +936,9 @@ spec: ) < 0.03 and kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1m labels: @@ -949,8 +949,9 @@ spec: annotations: description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim }} in Namespace {{ $labels.namespace - }} is expected to run out of inodes within four days. Currently {{ $value - | humanizePercentage }} of its inodes are free. + }} on Cluster {{ $labels.cluster }} is expected to run out of inodes + within four days. Currently {{ $value | humanizePercentage }} of its + inodes are free. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeInodesFillingUp.md summary: PersistentVolumeInodes are filling up. syn_component: openshift4-monitoring @@ -964,9 +965,9 @@ spec: kubelet_volume_stats_inodes_used{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"} > 0 and predict_linear(kubelet_volume_stats_inodes_free{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kubelet", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)", access_mode="ReadOnlyMany"} == 1 - unless on(namespace, persistentvolumeclaim) + unless on(cluster, namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",label_alerts_k8s_io_kube_persistent_volume_filling_up="disabled"} == 1 for: 1h labels: @@ -2172,7 +2173,7 @@ spec: description: '{{ $value | humanizePercentage }} of reconciling operations failed for {{ $labels.controller }} controller in {{ $labels.namespace }} namespace.' - summary: Errors while reconciling controller. + summary: Errors while reconciling objects. syn_component: openshift4-monitoring expr: | (sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 @@ -2195,6 +2196,20 @@ spec: severity: warning syn: 'true' syn_component: openshift4-monitoring + - alert: SYN_PrometheusOperatorStatusUpdateErrors + annotations: + description: '{{ $value | humanizePercentage }} of status update operations + failed for {{ $labels.controller }} controller in {{ $labels.namespace + }} namespace.' + summary: Errors while updating objects status. + syn_component: openshift4-monitoring + expr: | + (sum by (controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="prometheus-operator", namespace=~"openshift-monitoring|openshift-user-workload-monitoring"}[5m]))) > 0.1 + for: 10m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-monitoring - alert: SYN_PrometheusOperatorSyncFailed annotations: description: Controller {{ $labels.controller }} in {{ $labels.namespace From e13c156b6d3b9716c5a2f7d2b6db565fb9d764a7 Mon Sep 17 00:00:00 2001 From: Simon Gerber Date: Wed, 3 Jan 2024 15:44:06 +0100 Subject: [PATCH 2/2] Prune fields with `null` values in user-provided Alertmanager config This allows us to gradually roll out Alertmanager configs with new fields set (e.g. `opsgenie_configs.source` in a receiver config) even though the receiver config is formatted as a list which isn't easily editable in the hierarchy. --- component/alert-routing-discovery.libsonnet | 2 +- docs/modules/ROOT/pages/references/parameters.adoc | 2 ++ .../openshift4-monitoring/10_alertmanager_config.yaml | 1 + .../openshift4-monitoring/99_discovery_debug_cm.yaml | 1 + tests/team-routing.yml | 4 ++++ 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/component/alert-routing-discovery.libsonnet b/component/alert-routing-discovery.libsonnet index 188d92c4..97519a03 100644 --- a/component/alert-routing-discovery.libsonnet +++ b/component/alert-routing-discovery.libsonnet @@ -73,7 +73,7 @@ local teamBasedRouting = std.map( local alertmanagerConfig = local routes = std.get(params.openshift4_monitoring.alertManagerConfig.route, 'routes', []); - params.openshift4_monitoring.alertManagerConfig { + std.prune(params.openshift4_monitoring.alertManagerConfig) { receivers+: [ { name: nullReceiver } ], route+: { routes: adParams.prepend_routes + teamBasedRouting + adParams.append_routes + routes + if ownerOrFallbackTeam != null then [ { diff --git a/docs/modules/ROOT/pages/references/parameters.adoc b/docs/modules/ROOT/pages/references/parameters.adoc index e95f362d..424948dc 100644 --- a/docs/modules/ROOT/pages/references/parameters.adoc +++ b/docs/modules/ROOT/pages/references/parameters.adoc @@ -268,6 +268,8 @@ A dictionary holding the configuration for the AlertManager. See the https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html#applying-custom-alertmanager-configuration_managing-alerts[OpenShift docs] for available parameters. +The component will silently drop any fields in the provided config which are empty. +The component treats `null` as empty for scalar fields. == `alertManagerAutoDiscovery` diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml index c011ad8a..d56e4c9a 100644 --- a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/10_alertmanager_config.yaml @@ -25,6 +25,7 @@ stringData: "target_match_re": "severity": "info" "receivers": + - "name": "foo" - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml index 8a94f1bc..b7f904ec 100644 --- a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml +++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/99_discovery_debug_cm.yaml @@ -17,6 +17,7 @@ data: "target_match_re": "severity": "info" "receivers": + - "name": "foo" - "name": "__component_openshift4_monitoring_null" "route": "group_interval": "5s" diff --git a/tests/team-routing.yml b/tests/team-routing.yml index 39cf0a9b..8baaae54 100644 --- a/tests/team-routing.yml +++ b/tests/team-routing.yml @@ -38,6 +38,10 @@ parameters: openshift4_monitoring: alertManagerConfig: + receivers: + - name: foo + opsgenie_configs: + - source: null route: routes: - receiver: other