-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #120 from appuio/dep/logging-5.8
Support OpenShift Logging 5.8
- Loading branch information
Showing
22 changed files
with
819 additions
and
237 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,14 @@ | ||
https://raw.githubusercontent.com/openshift/cluster-logging-operator/release-5.6/internal/metrics/alerts/fluentd.go.FluentdPrometheusAlert release-5.6/fluentd_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/cluster-logging-operator/release-5.7/internal/metrics/alerts/fluentd.go.FluentdPrometheusAlert release-5.7/fluentd_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/cluster-logging-operator/master/internal/metrics/alerts/fluentd.go.FluentdPrometheusAlert master/fluentd_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/cluster-logging-operator/release-5.8/config/prometheus/collector_alerts.yaml release-5.8/collector_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/cluster-logging-operator/master/config/prometheus/collector_alerts.yaml master/collector_prometheus_alerts.yaml | ||
|
||
https://raw.githubusercontent.com/openshift/elasticsearch-operator/release-5.6/files/prometheus_alerts.yml release-5.6/elasticsearch_operator_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/elasticsearch-operator/release-5.7/files/prometheus_alerts.yml release-5.7/elasticsearch_operator_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/elasticsearch-operator/release-5.8/files/prometheus_alerts.yml release-5.8/elasticsearch_operator_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/elasticsearch-operator/master/files/prometheus_alerts.yml master/elasticsearch_operator_prometheus_alerts.yaml | ||
|
||
https://raw.githubusercontent.com/openshift/loki/release-5.6/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml release-5.6/lokistack_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/loki/release-5.7/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml release-5.7/lokistack_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/loki/release-5.8/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml release-5.8/lokistack_prometheus_alerts.yaml | ||
https://raw.githubusercontent.com/openshift/loki/main/operator/internal/manifests/internal/alerts/prometheus-alerts.yaml master/lokistack_prometheus_alerts.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
71 changes: 71 additions & 0 deletions
71
component/extracted_alerts/master/collector_prometheus_alerts.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
apiVersion: monitoring.coreos.com/v1 | ||
kind: PrometheusRule | ||
metadata: | ||
name: collector | ||
namespace: openshift-logging | ||
spec: | ||
groups: | ||
- name: logging_collector.alerts | ||
rules: | ||
- alert: CollectorNodeDown | ||
annotations: | ||
message: "Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod }} collector component for more than 10m." | ||
summary: "Collector cannot be scraped" | ||
expr: | | ||
up{app_kubernetes_io_component = "collector", app_kubernetes_io_part_of = "cluster-logging"} == 0 | ||
for: 10m | ||
labels: | ||
service: collector | ||
severity: critical | ||
- alert: CollectorHighErrorRate | ||
annotations: | ||
message: "{{ $value }}% of records have resulted in an error by {{ $labels.namespace }}/{{ $labels.pod }} collector component." | ||
summary: "{{ $labels.namespace }}/{{ $labels.pod }} collector component errors are high" | ||
expr: | | ||
100 * ( | ||
collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} | ||
/ | ||
collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} | ||
) > 0.001 | ||
for: 15m | ||
labels: | ||
service: collector | ||
severity: critical | ||
- alert: CollectorVeryHighErrorRate | ||
annotations: | ||
message: "{{ $value }}% of records have resulted in an error by {{ $labels.namespace }}/{{ $labels.pod }} collector component." | ||
summary: "{{ $labels.namespace }}/{{ $labels.pod }} collector component errors are very high" | ||
expr: | | ||
100 * ( | ||
collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} | ||
/ | ||
collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} | ||
) > 0.05 | ||
for: 15m | ||
labels: | ||
service: collector | ||
severity: critical | ||
- alert: FluentdQueueLengthIncreasing | ||
annotations: | ||
message: "For the last hour, fluentd {{ $labels.pod }} output '{{ $labels.plugin_id }}' average buffer queue length has increased continuously." | ||
summary: "Fluentd pod {{ $labels.pod }} is unable to keep up with traffic over time for forwarder output {{ $labels.plugin_id }}." | ||
expr: | | ||
sum by (pod,plugin_id) ( 0 * (deriv(fluentd_output_status_emit_records[1m] offset 1h))) + on(pod,plugin_id) ( deriv(fluentd_output_status_buffer_queue_length[10m]) > 0 and delta(fluentd_output_status_buffer_queue_length[1h]) > 1 ) | ||
for: 1h | ||
labels: | ||
service: collector | ||
severity: Warning | ||
- name: logging_clusterlogging_telemetry.rules | ||
rules: | ||
- expr: | | ||
sum by(cluster)(log_collected_bytes_total) | ||
record: cluster:log_collected_bytes_total:sum | ||
- expr: | | ||
sum by(cluster)(log_logged_bytes_total) | ||
record: cluster:log_logged_bytes_total:sum | ||
- expr: | | ||
sum by(pod, namespace, app_kubernetes_io_part_of)(rate(vector_component_errors_total[2m])) or sum by(pod, namespace, app_kubernetes_io_part_of)(rate(fluentd_output_status_num_errors[2m])) | ||
record: collector:log_num_errors:sum_rate | ||
- expr: | | ||
sum by(pod, namespace, app_kubernetes_io_part_of)(rate(vector_component_received_events_total[2m])) or sum by(pod, namespace, app_kubernetes_io_part_of)(rate(fluentd_output_status_emit_records[2m])) | ||
record: collector:received_events:sum_rate |
64 changes: 0 additions & 64 deletions
64
component/extracted_alerts/master/fluentd_prometheus_alerts.yaml
This file was deleted.
Oops, something went wrong.
71 changes: 71 additions & 0 deletions
71
component/extracted_alerts/release-5.8/collector_prometheus_alerts.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
apiVersion: monitoring.coreos.com/v1 | ||
kind: PrometheusRule | ||
metadata: | ||
name: collector | ||
namespace: openshift-logging | ||
spec: | ||
groups: | ||
- name: logging_collector.alerts | ||
rules: | ||
- alert: CollectorNodeDown | ||
annotations: | ||
message: "Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod }} collector component for more than 10m." | ||
summary: "Collector cannot be scraped" | ||
expr: | | ||
up{app_kubernetes_io_component = "collector", app_kubernetes_io_part_of = "cluster-logging"} == 0 | ||
for: 10m | ||
labels: | ||
service: collector | ||
severity: critical | ||
- alert: CollectorHighErrorRate | ||
annotations: | ||
message: "{{ $value }}% of records have resulted in an error by {{ $labels.namespace }}/{{ $labels.pod }} collector component." | ||
summary: "{{ $labels.namespace }}/{{ $labels.pod }} collector component errors are high" | ||
expr: | | ||
100 * ( | ||
collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} | ||
/ | ||
collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} | ||
) > 0.001 | ||
for: 15m | ||
labels: | ||
service: collector | ||
severity: critical | ||
- alert: CollectorVeryHighErrorRate | ||
annotations: | ||
message: "{{ $value }}% of records have resulted in an error by {{ $labels.namespace }}/{{ $labels.pod }} collector component." | ||
summary: "{{ $labels.namespace }}/{{ $labels.pod }} collector component errors are very high" | ||
expr: | | ||
100 * ( | ||
collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} | ||
/ | ||
collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} | ||
) > 0.05 | ||
for: 15m | ||
labels: | ||
service: collector | ||
severity: critical | ||
- alert: FluentdQueueLengthIncreasing | ||
annotations: | ||
message: "For the last hour, fluentd {{ $labels.pod }} output '{{ $labels.plugin_id }}' average buffer queue length has increased continuously." | ||
summary: "Fluentd pod {{ $labels.pod }} is unable to keep up with traffic over time for forwarder output {{ $labels.plugin_id }}." | ||
expr: | | ||
sum by (pod,plugin_id) ( 0 * (deriv(fluentd_output_status_emit_records[1m] offset 1h))) + on(pod,plugin_id) ( deriv(fluentd_output_status_buffer_queue_length[10m]) > 0 and delta(fluentd_output_status_buffer_queue_length[1h]) > 1 ) | ||
for: 1h | ||
labels: | ||
service: collector | ||
severity: Warning | ||
- name: logging_clusterlogging_telemetry.rules | ||
rules: | ||
- expr: | | ||
sum by(cluster)(log_collected_bytes_total) | ||
record: cluster:log_collected_bytes_total:sum | ||
- expr: | | ||
sum by(cluster)(log_logged_bytes_total) | ||
record: cluster:log_logged_bytes_total:sum | ||
- expr: | | ||
sum by(pod, namespace, app_kubernetes_io_part_of)(rate(vector_component_errors_total[2m])) or sum by(pod, namespace, app_kubernetes_io_part_of)(rate(fluentd_output_status_num_errors[2m])) | ||
record: collector:log_num_errors:sum_rate | ||
- expr: | | ||
sum by(pod, namespace, app_kubernetes_io_part_of)(rate(vector_component_received_events_total[2m])) or sum by(pod, namespace, app_kubernetes_io_part_of)(rate(fluentd_output_status_emit_records[2m])) | ||
record: collector:received_events:sum_rate |
Oops, something went wrong.