From 2dd550e1db687590abca17d5366074599a31a6bd Mon Sep 17 00:00:00 2001
From: Adrian Haas <haasad@users.noreply.github.com>
Date: Fri, 19 Jul 2024 14:07:28 +0200
Subject: [PATCH] Add support for OCP 4.15

---
 class/defaults.yml                            | 17 +++----
 class/openshift4-monitoring.yml               | 43 ++++++++---------
 .../prometheus_rules.yaml                     | 47 ++++++++++++++----
 .../openshift4-monitoring/silence.yaml        |  2 +-
 .../prometheus_rules.yaml                     | 47 ++++++++++++++----
 .../openshift4-monitoring/silence.yaml        |  2 +-
 .../openshift4-monitoring/cronjobs.yaml       |  2 +-
 .../prometheus_rules.yaml                     |  2 +-
 .../openshift4-monitoring/silence.yaml        |  2 +-
 .../prometheus_rules.yaml                     | 47 ++++++++++++++----
 .../openshift4-monitoring/silence.yaml        |  2 +-
 .../prometheus_rules.yaml                     |  2 +-
 .../openshift4-monitoring/silence.yaml        |  2 +-
 .../prometheus_rules.yaml                     | 18 ++++---
 .../openshift4-monitoring/silence.yaml        |  2 +-
 .../prometheus_rules.yaml                     | 47 ++++++++++++++----
 .../openshift4-monitoring/silence.yaml        |  2 +-
 .../prometheus_rules.yaml                     | 48 +++++++++++++++----
 .../openshift4-monitoring/silence.yaml        |  2 +-
 .../prometheus_rules.yaml                     | 47 ++++++++++++++----
 .../openshift4-monitoring/silence.yaml        |  2 +-
 .../prometheus_rules.yaml                     | 47 ++++++++++++++----
 .../openshift4-monitoring/silence.yaml        |  2 +-
 23 files changed, 310 insertions(+), 124 deletions(-)

diff --git a/class/defaults.yml b/class/defaults.yml
index 8a62e243..3347ed68 100644
--- a/class/defaults.yml
+++ b/class/defaults.yml
@@ -7,21 +7,15 @@ parameters:
         prom.libsonnet: openshift4-monitoring-prom.libsonnet
         alert-patching.libsonnet: openshift4-monitoring-alert-patching.libsonnet
     namespace: openshift-monitoring
-    # TODO: select based on reported OCP version once we have dynamic facts
-    manifests_version: release-4.14
-    =_cluster_monitoring_operator_version_map:
-      release-4.13: release-4.13
-      release-4.14: release-4.14
-    =_etcd_operator_version_map:
-      release-4.13: release-4.13
-      release-4.14: release-4.14
+    manifests_version: release-4.15
     # no release branches newer than 4.9 exist
     =_operator_lifecycle_manager_map:
       release-4.13: release-4.9
       release-4.14: release-4.9
+      release-4.15: release-4.9
     jsonnetfile_parameters:
-      cmo_version: ${openshift4_monitoring:_cluster_monitoring_operator_version_map:${openshift4_monitoring:manifests_version}}
-      etcd_version: ${openshift4_monitoring:_etcd_operator_version_map:${openshift4_monitoring:manifests_version}}
+      cmo_version: ${openshift4_monitoring:manifests_version}
+      etcd_version: ${openshift4_monitoring:manifests_version}
     defaultConfig:
       nodeSelector:
         node-role.kubernetes.io/infra: ''
@@ -211,6 +205,7 @@ parameters:
             expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > on (instance) (count by (instance) (node_cpu_info{}) * 100)
         release-4.13: {}
         release-4.14: {}
+        release-4.15: {}
       # Alerts to ignore for user workload monitoring
       ignoreUserWorkload: []
 
@@ -237,7 +232,7 @@ parameters:
     images:
       oc:
         image: quay.io/appuio/oc
-        tag: v4.14
+        tag: v4.15
       node_exporter:
         registry: quay.io
         repository: prometheus/node-exporter
diff --git a/class/openshift4-monitoring.yml b/class/openshift4-monitoring.yml
index 3d7b7066..4dc1b1f5 100644
--- a/class/openshift4-monitoring.yml
+++ b/class/openshift4-monitoring.yml
@@ -2,14 +2,6 @@ parameters:
   openshift4_monitoring:
     =_manifest_urls:
       kube-apiserver:
-        release-4.11:
-          api-usage: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.11/bindata/assets/alerts/api-usage.yaml
-          cpu-utilization: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.11/bindata/assets/alerts/cpu-utilization.yaml
-          slos: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.11/bindata/assets/alerts/kube-apiserver-slos-basic.yaml
-        release-4.12:
-          api-usage: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.12/bindata/assets/alerts/api-usage.yaml
-          cpu-utilization: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.12/bindata/assets/alerts/cpu-utilization.yaml
-          slos: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.12/bindata/assets/alerts/kube-apiserver-slos-basic.yaml
         release-4.13:
           api-usage: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.13/bindata/assets/alerts/api-usage.yaml
           cpu-utilization: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.13/bindata/assets/alerts/cpu-utilization.yaml
@@ -18,28 +10,20 @@ parameters:
           api-usage: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.14/bindata/assets/alerts/api-usage.yaml
           cpu-utilization: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.14/bindata/assets/alerts/cpu-utilization.yaml
           slos: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.14/bindata/assets/alerts/kube-apiserver-slos-basic.yaml
+        release-4.15:
+          api-usage: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.15/bindata/assets/alerts/api-usage.yaml
+          cpu-utilization: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.15/bindata/assets/alerts/cpu-utilization.yaml
+          slos: https://raw.githubusercontent.com/openshift/cluster-kube-apiserver-operator/release-4.15/bindata/assets/alerts/kube-apiserver-slos-basic.yaml
 
       machine-api-operator:
-        release-4.11:
-          prometheus: https://raw.githubusercontent.com/openshift/machine-api-operator/release-4.11/install/0000_90_machine-api-operator_04_alertrules.yaml
-        release-4.12:
-          prometheus: https://raw.githubusercontent.com/openshift/machine-api-operator/release-4.12/install/0000_90_machine-api-operator_04_alertrules.yaml
         release-4.13:
           prometheus: https://raw.githubusercontent.com/openshift/machine-api-operator/release-4.13/install/0000_90_machine-api-operator_04_alertrules.yaml
         release-4.14:
           prometheus: https://raw.githubusercontent.com/openshift/machine-api-operator/release-4.14/install/0000_90_machine-api-operator_04_alertrules.yaml
+        release-4.15:
+          prometheus: https://raw.githubusercontent.com/openshift/machine-api-operator/release-4.15/install/0000_90_machine-api-operator_04_alertrules.yaml
 
       ovn-kubernetes:
-        release-4.11:
-          common: https://raw.githubusercontent.com/openshift/cluster-network-operator/${openshift4_monitoring:manifests_version}/bindata/network/ovn-kubernetes/common/alert-rules.yaml
-          # We use the "self-hosted" variant of the control-plane alerts, so
-          # we don't have to worry about unresolved gotemplate references.
-          control_plane: https://raw.githubusercontent.com/openshift/cluster-network-operator/${openshift4_monitoring:manifests_version}/bindata/network/ovn-kubernetes/self-hosted/alert-rules-control-plane.yaml
-        release-4.12:
-          common: https://raw.githubusercontent.com/openshift/cluster-network-operator/${openshift4_monitoring:manifests_version}/bindata/network/ovn-kubernetes/common/alert-rules.yaml
-          # We use the "self-hosted" variant of the control-plane alerts, so
-          # we don't have to worry about unresolved gotemplate references.
-          control_plane: https://raw.githubusercontent.com/openshift/cluster-network-operator/${openshift4_monitoring:manifests_version}/bindata/network/ovn-kubernetes/self-hosted/alert-rules-control-plane.yaml
         release-4.13:
           common: https://raw.githubusercontent.com/openshift/cluster-network-operator/${openshift4_monitoring:manifests_version}/bindata/network/ovn-kubernetes/common/alert-rules.yaml
           # We use the "self-hosted" variant of the control-plane alerts, so
@@ -56,11 +40,24 @@ parameters:
           # when selecting OVNKubernetes as the network plugin during
           # installation.
           control_plane: https://raw.githubusercontent.com/openshift/cluster-network-operator/release-4.14/bindata/network/ovn-kubernetes/self-hosted/multi-zone-interconnect/alert-rules-control-plane.yaml
+        release-4.15:
+          common: https://raw.githubusercontent.com/openshift/cluster-network-operator/${openshift4_monitoring:manifests_version}/bindata/network/ovn-kubernetes/common/alert-rules.yaml
+          # We handle the gotemplate stuff in Jsonnet for now, since Jinja
+          # can't deal with gotemplate expressions like `{{.OvnkubeMasterReplicas}}`.
+          # The only templates that are in the alerting rules can be handled
+          # with a simple string replace.
+          control_plane: https://raw.githubusercontent.com/openshift/cluster-network-operator/release-4.15/bindata/network/ovn-kubernetes/self-hosted/alert-rules-control-plane.yaml
+
+      cloud-credential-operator:
+        release-4.13: https://raw.githubusercontent.com/openshift/cloud-credential-operator/release-4.13/manifests/0000_90_cloud-credential-operator_04_alertrules.yaml
+        release-4.14: https://raw.githubusercontent.com/openshift/cloud-credential-operator/release-4.14/manifests/0000_90_cloud-credential-operator_04_alertrules.yaml
+        release-4.15: https://raw.githubusercontent.com/openshift/cloud-credential-operator/release-4.15/manifests/0000_90_cloud-credential-operator_03_alertrules.yaml
+
 
   kapitan:
     dependencies:
       - type: https
-        source: https://raw.githubusercontent.com/openshift/cloud-credential-operator/${openshift4_monitoring:manifests_version}/manifests/0000_90_cloud-credential-operator_04_alertrules.yaml
+        source: ${openshift4_monitoring:_manifest_urls:cloud-credential-operator:${openshift4_monitoring:manifests_version}}
         output_path: dependencies/openshift4-monitoring/manifests/${openshift4_monitoring:manifests_version}/cloud-credential-operator.yaml
       # Download cluster-version-operator rules YAML to folder
       # `manifests_requiring_prerendering/`, because we cannot prerender
diff --git a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index b8f2fdad..94eda30e 100644
--- a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -221,6 +221,25 @@ spec:
             syn_component: openshift4-monitoring
     - name: syn-cluster-operators
       rules:
+        - alert: SYN_CannotEvaluateConditionalUpdates
+          annotations:
+            description: Failure to evaluate conditional update matches means that
+              Cluster Version Operator cannot decide whether an update path is recommended
+              or not.
+            summary: Cluster Version Operator cannot evaluate conditional update matches
+              for {{ $value | humanizeDuration }}.
+            syn_component: openshift4-monitoring
+          expr: |
+            max by (version, condition, status, reason)
+            (
+              (
+                time()-cluster_version_conditional_update_condition_seconds{condition="Recommended", status="Unknown"}
+              ) >= 3600
+            )
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
         - alert: SYN_ClusterOperatorDegraded
           annotations:
             description: The {{ $labels.name }} operator is degraded because {{ $labels.reason
@@ -392,13 +411,17 @@ spec:
             syn_component: openshift4-monitoring
         - alert: SYN_HighOverallControlPlaneCPU
           annotations:
-            description: |-
-              On a multi-node cluster with three control plane nodes, the overall CPU utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the remaining two must handle the load of the cluster in order to be HA. If the cluster is using more than 2/3 of all capacity, if one control plane node fails, the remaining two are likely to fail when they take the load. To fix this, increase the CPU and memory on your control plane nodes.
-              On a single node OpenShift (SNO) cluster, this alert will also fire if the 2/3 of the CPU cores of the node are in use by any workload. This level of CPU utlization of an SNO cluster is probably not a problem under most circumstances, but high levels of utilization may result in degraded performance. To manage this alert or silence it in case of false positives see the following link:  https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+            description: Given three control plane nodes, the overall CPU utilization
+              may only be about 2/3 of all available capacity. This is because if
+              a single control plane node fails, the remaining two must handle the
+              load of the cluster in order to be HA. If the cluster is using more
+              than 2/3 of all capacity, if one control plane node fails, the remaining
+              two are likely to fail when they take the load. To fix this, increase
+              the CPU and memory on your control plane nodes.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
-            summary: CPU utilization across all control plane nodes is more than 60%
-              of the total available CPU. Control plane node outage may cause a cascading
-              failure; increase available CPU.
+            summary: CPU utilization across all three control plane nodes is higher
+              than two control plane nodes can sustain; a single control plane node
+              outage may cause a cascading failure; increase available CPU.
             syn_component: openshift4-monitoring
           expr: |
             sum(
@@ -426,7 +449,7 @@ spec:
             summary: etcd cluster database is running full.
             syn_component: openshift4-monitoring
           expr: |
-            (last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95
+            (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95
           for: 10m
           labels:
             severity: critical
@@ -829,7 +852,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
@@ -1352,10 +1375,12 @@ spec:
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeClockNotSynchronising.md
             summary: Clock not synchronising.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
             and
             node_timex_maxerror_seconds{job="node-exporter"} >= 16
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: critical
@@ -1367,7 +1392,8 @@ spec:
               0.05s. Ensure NTP is configured correctly on this host.
             summary: Clock skew detected.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             (
               node_timex_offset_seconds{job="node-exporter"} > 0.05
             and
@@ -1379,6 +1405,7 @@ spec:
             and
               deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
             )
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: warning
diff --git a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/capacity-alerts-with-node-labels/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []
diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index b8f2fdad..94eda30e 100644
--- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -221,6 +221,25 @@ spec:
             syn_component: openshift4-monitoring
     - name: syn-cluster-operators
       rules:
+        - alert: SYN_CannotEvaluateConditionalUpdates
+          annotations:
+            description: Failure to evaluate conditional update matches means that
+              Cluster Version Operator cannot decide whether an update path is recommended
+              or not.
+            summary: Cluster Version Operator cannot evaluate conditional update matches
+              for {{ $value | humanizeDuration }}.
+            syn_component: openshift4-monitoring
+          expr: |
+            max by (version, condition, status, reason)
+            (
+              (
+                time()-cluster_version_conditional_update_condition_seconds{condition="Recommended", status="Unknown"}
+              ) >= 3600
+            )
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
         - alert: SYN_ClusterOperatorDegraded
           annotations:
             description: The {{ $labels.name }} operator is degraded because {{ $labels.reason
@@ -392,13 +411,17 @@ spec:
             syn_component: openshift4-monitoring
         - alert: SYN_HighOverallControlPlaneCPU
           annotations:
-            description: |-
-              On a multi-node cluster with three control plane nodes, the overall CPU utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the remaining two must handle the load of the cluster in order to be HA. If the cluster is using more than 2/3 of all capacity, if one control plane node fails, the remaining two are likely to fail when they take the load. To fix this, increase the CPU and memory on your control plane nodes.
-              On a single node OpenShift (SNO) cluster, this alert will also fire if the 2/3 of the CPU cores of the node are in use by any workload. This level of CPU utlization of an SNO cluster is probably not a problem under most circumstances, but high levels of utilization may result in degraded performance. To manage this alert or silence it in case of false positives see the following link:  https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+            description: Given three control plane nodes, the overall CPU utilization
+              may only be about 2/3 of all available capacity. This is because if
+              a single control plane node fails, the remaining two must handle the
+              load of the cluster in order to be HA. If the cluster is using more
+              than 2/3 of all capacity, if one control plane node fails, the remaining
+              two are likely to fail when they take the load. To fix this, increase
+              the CPU and memory on your control plane nodes.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
-            summary: CPU utilization across all control plane nodes is more than 60%
-              of the total available CPU. Control plane node outage may cause a cascading
-              failure; increase available CPU.
+            summary: CPU utilization across all three control plane nodes is higher
+              than two control plane nodes can sustain; a single control plane node
+              outage may cause a cascading failure; increase available CPU.
             syn_component: openshift4-monitoring
           expr: |
             sum(
@@ -426,7 +449,7 @@ spec:
             summary: etcd cluster database is running full.
             syn_component: openshift4-monitoring
           expr: |
-            (last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95
+            (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95
           for: 10m
           labels:
             severity: critical
@@ -829,7 +852,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
@@ -1352,10 +1375,12 @@ spec:
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeClockNotSynchronising.md
             summary: Clock not synchronising.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
             and
             node_timex_maxerror_seconds{job="node-exporter"} >= 16
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: critical
@@ -1367,7 +1392,8 @@ spec:
               0.05s. Ensure NTP is configured correctly on this host.
             summary: Clock skew detected.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             (
               node_timex_offset_seconds{job="node-exporter"} > 0.05
             and
@@ -1379,6 +1405,7 @@ spec:
             and
               deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
             )
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: warning
diff --git a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/capacity-alerts/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []
diff --git a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/cronjobs.yaml b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/cronjobs.yaml
index ac862ee1..296bf7ac 100644
--- a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/cronjobs.yaml
+++ b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/cronjobs.yaml
@@ -99,7 +99,7 @@ spec:
               command:
                 - /usr/local/bin/script.sh
               env: []
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: job
               ports: []
diff --git a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index 03c9afc6..9edd5259 100644
--- a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -834,7 +834,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",namespace!~"(openshift-adp)",job="kube-state-metrics"}
diff --git a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/custom-rules/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []
diff --git a/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index d4432bde..86455112 100644
--- a/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -355,6 +355,25 @@ spec:
             syn_component: openshift4-monitoring
     - name: syn-cluster-operators
       rules:
+        - alert: SYN_CannotEvaluateConditionalUpdates
+          annotations:
+            description: Failure to evaluate conditional update matches means that
+              Cluster Version Operator cannot decide whether an update path is recommended
+              or not.
+            summary: Cluster Version Operator cannot evaluate conditional update matches
+              for {{ $value | humanizeDuration }}.
+            syn_component: openshift4-monitoring
+          expr: |
+            max by (version, condition, status, reason)
+            (
+              (
+                time()-cluster_version_conditional_update_condition_seconds{condition="Recommended", status="Unknown"}
+              ) >= 3600
+            )
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
         - alert: SYN_ClusterOperatorDegraded
           annotations:
             description: The {{ $labels.name }} operator is degraded because {{ $labels.reason
@@ -526,13 +545,17 @@ spec:
             syn_component: openshift4-monitoring
         - alert: SYN_HighOverallControlPlaneCPU
           annotations:
-            description: |-
-              On a multi-node cluster with three control plane nodes, the overall CPU utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the remaining two must handle the load of the cluster in order to be HA. If the cluster is using more than 2/3 of all capacity, if one control plane node fails, the remaining two are likely to fail when they take the load. To fix this, increase the CPU and memory on your control plane nodes.
-              On a single node OpenShift (SNO) cluster, this alert will also fire if the 2/3 of the CPU cores of the node are in use by any workload. This level of CPU utlization of an SNO cluster is probably not a problem under most circumstances, but high levels of utilization may result in degraded performance. To manage this alert or silence it in case of false positives see the following link:  https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+            description: Given three control plane nodes, the overall CPU utilization
+              may only be about 2/3 of all available capacity. This is because if
+              a single control plane node fails, the remaining two must handle the
+              load of the cluster in order to be HA. If the cluster is using more
+              than 2/3 of all capacity, if one control plane node fails, the remaining
+              two are likely to fail when they take the load. To fix this, increase
+              the CPU and memory on your control plane nodes.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
-            summary: CPU utilization across all control plane nodes is more than 60%
-              of the total available CPU. Control plane node outage may cause a cascading
-              failure; increase available CPU.
+            summary: CPU utilization across all three control plane nodes is higher
+              than two control plane nodes can sustain; a single control plane node
+              outage may cause a cascading failure; increase available CPU.
             syn_component: openshift4-monitoring
           expr: |
             sum(
@@ -560,7 +583,7 @@ spec:
             summary: etcd cluster database is running full.
             syn_component: openshift4-monitoring
           expr: |
-            (last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95
+            (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95
           for: 10m
           labels:
             severity: critical
@@ -963,7 +986,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
@@ -1486,10 +1509,12 @@ spec:
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeClockNotSynchronising.md
             summary: Clock not synchronising.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
             and
             node_timex_maxerror_seconds{job="node-exporter"} >= 16
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: critical
@@ -1501,7 +1526,8 @@ spec:
               0.05s. Ensure NTP is configured correctly on this host.
             summary: Clock skew detected.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             (
               node_timex_offset_seconds{job="node-exporter"} > 0.05
             and
@@ -1513,6 +1539,7 @@ spec:
             and
               deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
             )
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: warning
diff --git a/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/ovn-kubernetes/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []
diff --git a/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index a62709c3..ede3d6be 100644
--- a/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -831,7 +831,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
diff --git a/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/release-4.13/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []
diff --git a/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index b8f2fdad..3ef75985 100644
--- a/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -392,13 +392,17 @@ spec:
             syn_component: openshift4-monitoring
         - alert: SYN_HighOverallControlPlaneCPU
           annotations:
-            description: |-
-              On a multi-node cluster with three control plane nodes, the overall CPU utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the remaining two must handle the load of the cluster in order to be HA. If the cluster is using more than 2/3 of all capacity, if one control plane node fails, the remaining two are likely to fail when they take the load. To fix this, increase the CPU and memory on your control plane nodes.
-              On a single node OpenShift (SNO) cluster, this alert will also fire if the 2/3 of the CPU cores of the node are in use by any workload. This level of CPU utlization of an SNO cluster is probably not a problem under most circumstances, but high levels of utilization may result in degraded performance. To manage this alert or silence it in case of false positives see the following link:  https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+            description: Given three control plane nodes, the overall CPU utilization
+              may only be about 2/3 of all available capacity. This is because if
+              a single control plane node fails, the remaining two must handle the
+              load of the cluster in order to be HA. If the cluster is using more
+              than 2/3 of all capacity, if one control plane node fails, the remaining
+              two are likely to fail when they take the load. To fix this, increase
+              the CPU and memory on your control plane nodes.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
-            summary: CPU utilization across all control plane nodes is more than 60%
-              of the total available CPU. Control plane node outage may cause a cascading
-              failure; increase available CPU.
+            summary: CPU utilization across all three control plane nodes is higher
+              than two control plane nodes can sustain; a single control plane node
+              outage may cause a cascading failure; increase available CPU.
             syn_component: openshift4-monitoring
           expr: |
             sum(
@@ -829,7 +833,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
diff --git a/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/release-4.14/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []
diff --git a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index b8f2fdad..94eda30e 100644
--- a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -221,6 +221,25 @@ spec:
             syn_component: openshift4-monitoring
     - name: syn-cluster-operators
       rules:
+        - alert: SYN_CannotEvaluateConditionalUpdates
+          annotations:
+            description: Failure to evaluate conditional update matches means that
+              Cluster Version Operator cannot decide whether an update path is recommended
+              or not.
+            summary: Cluster Version Operator cannot evaluate conditional update matches
+              for {{ $value | humanizeDuration }}.
+            syn_component: openshift4-monitoring
+          expr: |
+            max by (version, condition, status, reason)
+            (
+              (
+                time()-cluster_version_conditional_update_condition_seconds{condition="Recommended", status="Unknown"}
+              ) >= 3600
+            )
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
         - alert: SYN_ClusterOperatorDegraded
           annotations:
             description: The {{ $labels.name }} operator is degraded because {{ $labels.reason
@@ -392,13 +411,17 @@ spec:
             syn_component: openshift4-monitoring
         - alert: SYN_HighOverallControlPlaneCPU
           annotations:
-            description: |-
-              On a multi-node cluster with three control plane nodes, the overall CPU utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the remaining two must handle the load of the cluster in order to be HA. If the cluster is using more than 2/3 of all capacity, if one control plane node fails, the remaining two are likely to fail when they take the load. To fix this, increase the CPU and memory on your control plane nodes.
-              On a single node OpenShift (SNO) cluster, this alert will also fire if the 2/3 of the CPU cores of the node are in use by any workload. This level of CPU utlization of an SNO cluster is probably not a problem under most circumstances, but high levels of utilization may result in degraded performance. To manage this alert or silence it in case of false positives see the following link:  https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+            description: Given three control plane nodes, the overall CPU utilization
+              may only be about 2/3 of all available capacity. This is because if
+              a single control plane node fails, the remaining two must handle the
+              load of the cluster in order to be HA. If the cluster is using more
+              than 2/3 of all capacity, if one control plane node fails, the remaining
+              two are likely to fail when they take the load. To fix this, increase
+              the CPU and memory on your control plane nodes.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
-            summary: CPU utilization across all control plane nodes is more than 60%
-              of the total available CPU. Control plane node outage may cause a cascading
-              failure; increase available CPU.
+            summary: CPU utilization across all three control plane nodes is higher
+              than two control plane nodes can sustain; a single control plane node
+              outage may cause a cascading failure; increase available CPU.
             syn_component: openshift4-monitoring
           expr: |
             sum(
@@ -426,7 +449,7 @@ spec:
             summary: etcd cluster database is running full.
             syn_component: openshift4-monitoring
           expr: |
-            (last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95
+            (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95
           for: 10m
           labels:
             severity: critical
@@ -829,7 +852,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
@@ -1352,10 +1375,12 @@ spec:
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeClockNotSynchronising.md
             summary: Clock not synchronising.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
             and
             node_timex_maxerror_seconds{job="node-exporter"} >= 16
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: critical
@@ -1367,7 +1392,8 @@ spec:
               0.05s. Ensure NTP is configured correctly on this host.
             summary: Clock skew detected.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             (
               node_timex_offset_seconds{job="node-exporter"} > 0.05
             and
@@ -1379,6 +1405,7 @@ spec:
             and
               deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
             )
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: warning
diff --git a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/remote-write/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []
diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index 726f5f95..3fca1e1d 100644
--- a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -233,6 +233,26 @@ spec:
             syn_team: clumsy-donkeys
     - name: syn-cluster-operators
       rules:
+        - alert: SYN_CannotEvaluateConditionalUpdates
+          annotations:
+            description: Failure to evaluate conditional update matches means that
+              Cluster Version Operator cannot decide whether an update path is recommended
+              or not.
+            summary: Cluster Version Operator cannot evaluate conditional update matches
+              for {{ $value | humanizeDuration }}.
+            syn_component: openshift4-monitoring
+          expr: |
+            max by (version, condition, status, reason)
+            (
+              (
+                time()-cluster_version_conditional_update_condition_seconds{condition="Recommended", status="Unknown"}
+              ) >= 3600
+            )
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
+            syn_team: clumsy-donkeys
         - alert: SYN_ClusterOperatorDegraded
           annotations:
             description: The {{ $labels.name }} operator is degraded because {{ $labels.reason
@@ -413,13 +433,17 @@ spec:
             syn_team: clumsy-donkeys
         - alert: SYN_HighOverallControlPlaneCPU
           annotations:
-            description: |-
-              On a multi-node cluster with three control plane nodes, the overall CPU utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the remaining two must handle the load of the cluster in order to be HA. If the cluster is using more than 2/3 of all capacity, if one control plane node fails, the remaining two are likely to fail when they take the load. To fix this, increase the CPU and memory on your control plane nodes.
-              On a single node OpenShift (SNO) cluster, this alert will also fire if the 2/3 of the CPU cores of the node are in use by any workload. This level of CPU utlization of an SNO cluster is probably not a problem under most circumstances, but high levels of utilization may result in degraded performance. To manage this alert or silence it in case of false positives see the following link:  https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+            description: Given three control plane nodes, the overall CPU utilization
+              may only be about 2/3 of all available capacity. This is because if
+              a single control plane node fails, the remaining two must handle the
+              load of the cluster in order to be HA. If the cluster is using more
+              than 2/3 of all capacity, if one control plane node fails, the remaining
+              two are likely to fail when they take the load. To fix this, increase
+              the CPU and memory on your control plane nodes.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
-            summary: CPU utilization across all control plane nodes is more than 60%
-              of the total available CPU. Control plane node outage may cause a cascading
-              failure; increase available CPU.
+            summary: CPU utilization across all three control plane nodes is higher
+              than two control plane nodes can sustain; a single control plane node
+              outage may cause a cascading failure; increase available CPU.
             syn_component: openshift4-monitoring
           expr: |
             sum(
@@ -448,7 +472,7 @@ spec:
             summary: etcd cluster database is running full.
             syn_component: openshift4-monitoring
           expr: |
-            (last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95
+            (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95
           for: 10m
           labels:
             severity: critical
@@ -872,7 +896,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
@@ -1425,10 +1449,12 @@ spec:
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeClockNotSynchronising.md
             summary: Clock not synchronising.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
             and
             node_timex_maxerror_seconds{job="node-exporter"} >= 16
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: critical
@@ -1441,7 +1467,8 @@ spec:
               0.05s. Ensure NTP is configured correctly on this host.
             summary: Clock skew detected.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             (
               node_timex_offset_seconds{job="node-exporter"} > 0.05
             and
@@ -1453,6 +1480,7 @@ spec:
             and
               deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
             )
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: warning
diff --git a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/team-routing/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []
diff --git a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index d0800206..6a2c3adf 100644
--- a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -221,6 +221,25 @@ spec:
             syn_component: openshift4-monitoring
     - name: syn-cluster-operators
       rules:
+        - alert: SYN_CannotEvaluateConditionalUpdates
+          annotations:
+            description: Failure to evaluate conditional update matches means that
+              Cluster Version Operator cannot decide whether an update path is recommended
+              or not.
+            summary: Cluster Version Operator cannot evaluate conditional update matches
+              for {{ $value | humanizeDuration }}.
+            syn_component: openshift4-monitoring
+          expr: |
+            max by (version, condition, status, reason)
+            (
+              (
+                time()-cluster_version_conditional_update_condition_seconds{condition="Recommended", status="Unknown"}
+              ) >= 3600
+            )
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
         - alert: SYN_ClusterOperatorDegraded
           annotations:
             description: The {{ $labels.name }} operator is degraded because {{ $labels.reason
@@ -392,13 +411,17 @@ spec:
             syn_component: openshift4-monitoring
         - alert: SYN_HighOverallControlPlaneCPU
           annotations:
-            description: |-
-              On a multi-node cluster with three control plane nodes, the overall CPU utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the remaining two must handle the load of the cluster in order to be HA. If the cluster is using more than 2/3 of all capacity, if one control plane node fails, the remaining two are likely to fail when they take the load. To fix this, increase the CPU and memory on your control plane nodes.
-              On a single node OpenShift (SNO) cluster, this alert will also fire if the 2/3 of the CPU cores of the node are in use by any workload. This level of CPU utlization of an SNO cluster is probably not a problem under most circumstances, but high levels of utilization may result in degraded performance. To manage this alert or silence it in case of false positives see the following link:  https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+            description: Given three control plane nodes, the overall CPU utilization
+              may only be about 2/3 of all available capacity. This is because if
+              a single control plane node fails, the remaining two must handle the
+              load of the cluster in order to be HA. If the cluster is using more
+              than 2/3 of all capacity, if one control plane node fails, the remaining
+              two are likely to fail when they take the load. To fix this, increase
+              the CPU and memory on your control plane nodes.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
-            summary: CPU utilization across all control plane nodes is more than 60%
-              of the total available CPU. Control plane node outage may cause a cascading
-              failure; increase available CPU.
+            summary: CPU utilization across all three control plane nodes is higher
+              than two control plane nodes can sustain; a single control plane node
+              outage may cause a cascading failure; increase available CPU.
             syn_component: openshift4-monitoring
           expr: |
             sum(
@@ -426,7 +449,7 @@ spec:
             summary: etcd cluster database is running full.
             syn_component: openshift4-monitoring
           expr: |
-            (last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95
+            (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95
           for: 10m
           labels:
             severity: critical
@@ -829,7 +852,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
@@ -1352,10 +1375,12 @@ spec:
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeClockNotSynchronising.md
             summary: Clock not synchronising.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
             and
             node_timex_maxerror_seconds{job="node-exporter"} >= 16
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: critical
@@ -1367,7 +1392,8 @@ spec:
               0.05s. Ensure NTP is configured correctly on this host.
             summary: Clock skew detected.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             (
               node_timex_offset_seconds{job="node-exporter"} > 0.05
             and
@@ -1379,6 +1405,7 @@ spec:
             and
               deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
             )
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: warning
diff --git a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/user-workload-monitoring/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []
diff --git a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
index b9e0f802..fd44674f 100644
--- a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
+++ b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/prometheus_rules.yaml
@@ -221,6 +221,25 @@ spec:
             syn_component: openshift4-monitoring
     - name: syn-cluster-operators
       rules:
+        - alert: SYN_CannotEvaluateConditionalUpdates
+          annotations:
+            description: Failure to evaluate conditional update matches means that
+              Cluster Version Operator cannot decide whether an update path is recommended
+              or not.
+            summary: Cluster Version Operator cannot evaluate conditional update matches
+              for {{ $value | humanizeDuration }}.
+            syn_component: openshift4-monitoring
+          expr: |
+            max by (version, condition, status, reason)
+            (
+              (
+                time()-cluster_version_conditional_update_condition_seconds{condition="Recommended", status="Unknown"}
+              ) >= 3600
+            )
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-monitoring
         - alert: SYN_ClusterOperatorDegraded
           annotations:
             description: The {{ $labels.name }} operator is degraded because {{ $labels.reason
@@ -392,13 +411,17 @@ spec:
             syn_component: openshift4-monitoring
         - alert: SYN_HighOverallControlPlaneCPU
           annotations:
-            description: |-
-              On a multi-node cluster with three control plane nodes, the overall CPU utilization may only be about 2/3 of all available capacity. This is because if a single control plane node fails, the remaining two must handle the load of the cluster in order to be HA. If the cluster is using more than 2/3 of all capacity, if one control plane node fails, the remaining two are likely to fail when they take the load. To fix this, increase the CPU and memory on your control plane nodes.
-              On a single node OpenShift (SNO) cluster, this alert will also fire if the 2/3 of the CPU cores of the node are in use by any workload. This level of CPU utlization of an SNO cluster is probably not a problem under most circumstances, but high levels of utilization may result in degraded performance. To manage this alert or silence it in case of false positives see the following link:  https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+            description: Given three control plane nodes, the overall CPU utilization
+              may only be about 2/3 of all available capacity. This is because if
+              a single control plane node fails, the remaining two must handle the
+              load of the cluster in order to be HA. If the cluster is using more
+              than 2/3 of all capacity, if one control plane node fails, the remaining
+              two are likely to fail when they take the load. To fix this, increase
+              the CPU and memory on your control plane nodes.
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
-            summary: CPU utilization across all control plane nodes is more than 60%
-              of the total available CPU. Control plane node outage may cause a cascading
-              failure; increase available CPU.
+            summary: CPU utilization across all three control plane nodes is higher
+              than two control plane nodes can sustain; a single control plane node
+              outage may cause a cascading failure; increase available CPU.
             syn_component: openshift4-monitoring
           expr: |
             sum(
@@ -426,7 +449,7 @@ spec:
             summary: etcd cluster database is running full.
             syn_component: openshift4-monitoring
           expr: |
-            (last_over_time(etcd_mvcc_db_total_size_in_bytes[5m]) / last_over_time(etcd_server_quota_backend_bytes[5m]))*100 > 95
+            (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95
           for: 10m
           labels:
             severity: critical
@@ -829,7 +852,7 @@ spec:
             syn_component: openshift4-monitoring
           expr: |
             (
-              max without (revision) (
+              max by(namespace, statefulset) (
                 kube_statefulset_status_current_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
                   unless
                 kube_statefulset_status_update_revision{namespace=~"(appuio.*|cilium|default|kube-.*|openshift-.*|syn.*)",job="kube-state-metrics"}
@@ -1352,10 +1375,12 @@ spec:
             runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeClockNotSynchronising.md
             summary: Clock not synchronising.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
             and
             node_timex_maxerror_seconds{job="node-exporter"} >= 16
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: critical
@@ -1367,7 +1392,8 @@ spec:
               0.05s. Ensure NTP is configured correctly on this host.
             summary: Clock skew detected.
             syn_component: openshift4-monitoring
-          expr: |
+          expr: |-
+            (
             (
               node_timex_offset_seconds{job="node-exporter"} > 0.05
             and
@@ -1379,6 +1405,7 @@ spec:
             and
               deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
             )
+            ) and on() absent(up{job="ptp-monitor-service"})
           for: 10m
           labels:
             severity: warning
diff --git a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/silence.yaml b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/silence.yaml
index c3e45f77..ccae3b65 100644
--- a/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/silence.yaml
+++ b/tests/golden/vsphere/openshift4-monitoring/openshift4-monitoring/silence.yaml
@@ -63,7 +63,7 @@ spec:
                     configMapKeyRef:
                       key: silences.json
                       name: silence
-              image: quay.io/appuio/oc:v4.14
+              image: quay.io/appuio/oc:v4.15
               imagePullPolicy: IfNotPresent
               name: silence
               ports: []