From 13f1ff3d71981668c66d9533ee46f15b3f65f334 Mon Sep 17 00:00:00 2001 From: Stephan Feurer Date: Wed, 15 Jan 2025 17:11:54 +0100 Subject: [PATCH] Fix generating null resource A null resource was generated under the following condition: * Loki Stack not enabled * LogForwarding enabled --- component/log_forwarder.libsonnet | 5 +- .../openshift4_console_params.yaml | 3 - .../openshift4-logging/20_subscriptions.yaml | 39 --- .../openshift4-logging/30_loki_logstore.yaml | 14 - .../openshift4-logging/30_loki_netpol.yaml | 54 ---- .../openshift4-logging/30_loki_plugin.yaml | 13 - .../openshift4-logging/30_loki_rbac.yaml | 18 -- .../openshift4-logging/30_loki_stack.yaml | 61 ----- .../openshift4-logging/40_log_forwarder.yaml | 35 +-- .../40_log_forwarder_rbac.yaml | 18 -- .../50_fix_app_logs_reader.yaml | 17 -- .../50_fix_ingester_stuck.yaml | 153 ----------- .../50_fix_missing_metrics_token.yaml | 11 - .../60_lokistack_alerts.yaml | 245 ------------------ tests/multilineerr.yml | 19 +- 15 files changed, 33 insertions(+), 672 deletions(-) delete mode 100644 tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_logstore.yaml delete mode 100644 tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_netpol.yaml delete mode 100644 tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_plugin.yaml delete mode 100644 tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_rbac.yaml delete mode 100644 tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_stack.yaml delete mode 100644 tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_app_logs_reader.yaml delete mode 100644 tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_ingester_stuck.yaml delete mode 100644 tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_missing_metrics_token.yaml delete mode 100644 tests/golden/multilineerr/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml diff --git a/component/log_forwarder.libsonnet b/component/log_forwarder.libsonnet index f3cf0a4..f8d421d 100644 --- a/component/log_forwarder.libsonnet +++ b/component/log_forwarder.libsonnet @@ -161,7 +161,8 @@ local rbac = [ namespace: params.namespace, } ], }, - if lokiEnabled then kube._Object('rbac.authorization.k8s.io/v1', 'ClusterRoleBinding', 'logcollector-log-writer') { +] + if lokiEnabled then [ + kube._Object('rbac.authorization.k8s.io/v1', 'ClusterRoleBinding', 'logcollector-log-writer') { metadata+: { annotations+: { 'argocd.argoproj.io/sync-wave': '-50', @@ -179,7 +180,7 @@ local rbac = [ namespace: params.namespace, } ], }, -]; +] else []; // Define outputs below if forwarderEnabled then diff --git a/tests/golden/multilineerr/openshift4-logging/console-patching/openshift4_console_params.yaml b/tests/golden/multilineerr/openshift4-logging/console-patching/openshift4_console_params.yaml index f71555a..e69de29 100644 --- a/tests/golden/multilineerr/openshift4-logging/console-patching/openshift4_console_params.yaml +++ b/tests/golden/multilineerr/openshift4-logging/console-patching/openshift4_console_params.yaml @@ -1,3 +0,0 @@ -config: - plugins: - - logging-view-plugin diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/20_subscriptions.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/20_subscriptions.yaml index 158bafb..4ed7e21 100644 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/20_subscriptions.yaml +++ b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/20_subscriptions.yaml @@ -20,42 +20,3 @@ spec: name: cluster-logging source: redhat-operators sourceNamespace: openshift-operators-redhat ---- -apiVersion: operators.coreos.com/v1alpha1 -kind: Subscription -metadata: - annotations: - argocd.argoproj.io/sync-wave: '-80' - labels: - name: loki-operator - name: loki-operator - namespace: openshift-operators-redhat -spec: - channel: stable-6.1 - config: - resources: - limits: - memory: 512Mi - requests: - cpu: 50m - memory: 381Mi - installPlanApproval: Automatic - name: loki-operator - source: openshift-operators-redhat - sourceNamespace: openshift-operators-redhat ---- -apiVersion: operators.coreos.com/v1alpha1 -kind: Subscription -metadata: - annotations: - argocd.argoproj.io/sync-wave: '-80' - labels: - name: cluster-observability-operator - name: cluster-observability-operator - namespace: openshift-operators-redhat -spec: - channel: development - installPlanApproval: Automatic - name: cluster-observability-operator - source: openshift-operators-redhat - sourceNamespace: openshift-operators-redhat diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_logstore.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_logstore.yaml deleted file mode 100644 index 77d8c18..0000000 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_logstore.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: v1 -data: {} -kind: Secret -metadata: - annotations: {} - labels: - name: loki-logstore - name: loki-logstore -stringData: - access_key_id: '' - access_key_secret: '' - bucketnames: c-green-test-1234-logstore - endpoint: '' -type: Opaque diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_netpol.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_netpol.yaml deleted file mode 100644 index f2cd3bb..0000000 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_netpol.yaml +++ /dev/null @@ -1,54 +0,0 @@ -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy -metadata: - annotations: {} - labels: - name: allow-console-logging-view-plugin - name: allow-console-logging-view-plugin -spec: - ingress: - - from: - - podSelector: - matchLabels: - app: console - component: ui - - namespaceSelector: - matchLabels: - kubernetes.io/metadata.name: openshift-console - ports: - - port: 9443 - protocol: TCP - podSelector: - matchLabels: - app.kubernetes.io/created-by: openshift-logging_instance - app.kubernetes.io/name: logging-view-plugin - policyTypes: - - Ingress ---- -apiVersion: networking.k8s.io/v1 -kind: NetworkPolicy -metadata: - annotations: {} - labels: - name: allow-console-logging-lokistack-gateway - name: allow-console-logging-lokistack-gateway -spec: - ingress: - - from: - - podSelector: - matchLabels: - app: console - component: ui - - namespaceSelector: - matchLabels: - kubernetes.io/metadata.name: openshift-console - ports: - - port: 8080 - protocol: TCP - podSelector: - matchLabels: - app.kubernetes.io/component: lokistack-gateway - app.kubernetes.io/instance: loki - app.kubernetes.io/name: lokistack - policyTypes: - - Ingress diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_plugin.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_plugin.yaml deleted file mode 100644 index 3128c2f..0000000 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_plugin.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: observability.openshift.io/v1alpha1 -kind: UIPlugin -metadata: - labels: - name: logging - name: logging -spec: - logging: - logsLimit: 50 - lokiStack: - name: loki - timeout: 30s - type: Logging diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_rbac.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_rbac.yaml deleted file mode 100644 index d5dde59..0000000 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_rbac.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - annotations: {} - labels: - name: syn-loki-cluster-reader - rbac.authorization.k8s.io/aggregate-to-cluster-reader: 'true' - name: syn:loki:cluster-reader -rules: - - apiGroups: - - loki.grafana.com - resourceNames: - - logs - resources: - - application - - infrastructure - verbs: - - get diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_stack.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_stack.yaml deleted file mode 100644 index f859742..0000000 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/30_loki_stack.yaml +++ /dev/null @@ -1,61 +0,0 @@ -apiVersion: loki.grafana.com/v1 -kind: LokiStack -metadata: - annotations: - argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true - argocd.argoproj.io/sync-wave: '-50' - labels: - name: loki - name: loki -spec: - limits: - global: - ingestion: - ingestionBurstSize: 9 - ingestionRate: 5 - size: 1x.demo - storage: - schemas: - - effectiveDate: '2022-06-01' - version: v12 - - effectiveDate: '2024-09-01' - version: v13 - secret: - name: loki-logstore - type: s3 - storageClassName: '' - template: - compactor: - nodeSelector: - node-role.kubernetes.io/infra: '' - replicas: 1 - distributor: - nodeSelector: - node-role.kubernetes.io/infra: '' - replicas: 2 - gateway: - nodeSelector: - node-role.kubernetes.io/infra: '' - replicas: 2 - indexGateway: - nodeSelector: - node-role.kubernetes.io/infra: '' - replicas: 2 - ingester: - nodeSelector: - node-role.kubernetes.io/infra: '' - replicas: 2 - querier: - nodeSelector: - node-role.kubernetes.io/infra: '' - replicas: 2 - queryFrontend: - nodeSelector: - node-role.kubernetes.io/infra: '' - replicas: 2 - ruler: - nodeSelector: - node-role.kubernetes.io/infra: '' - replicas: 1 - tenants: - mode: openshift-logging diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/40_log_forwarder.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/40_log_forwarder.yaml index 2ff4093..89d9edb 100644 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/40_log_forwarder.yaml +++ b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/40_log_forwarder.yaml @@ -16,30 +16,21 @@ spec: tolerations: - key: storagenode operator: Exists + filters: + - name: detectexception + type: detectMultilineException + - name: labels + openshiftLabels: + foo: bar + type: openshiftLabels + - name: parse-json + type: parse managementState: Managed - outputs: - - lokiStack: - authentication: - token: - from: serviceAccount - target: - name: loki - namespace: openshift-logging - name: default-lokistack - tls: - ca: - configMapName: openshift-service-ca.crt - key: service-ca.crt - type: lokiStack pipelines: - - detectMultilineErrors: true + - filterRefs: + - detectexception + - labels + - parse-json name: application-logs - parse: json - - inputRefs: - - application - - infrastructure - name: default-lokistack - outputRefs: - - default-lokistack serviceAccount: name: logcollector diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/40_log_forwarder_rbac.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/40_log_forwarder_rbac.yaml index f8a6e37..cac68e1 100644 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/40_log_forwarder_rbac.yaml +++ b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/40_log_forwarder_rbac.yaml @@ -61,21 +61,3 @@ subjects: - kind: ServiceAccount name: logcollector namespace: openshift-logging ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - annotations: - argocd.argoproj.io/sync-wave: '-50' - labels: - name: logcollector-log-writer - name: logcollector-log-writer - namespace: openshift-logging -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: logging-collector-logs-writer -subjects: - - kind: ServiceAccount - name: logcollector - namespace: openshift-logging diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_app_logs_reader.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_app_logs_reader.yaml deleted file mode 100644 index 5e7989d..0000000 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_app_logs_reader.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - annotations: {} - labels: - name: logging-application-logs-reader-aggregate - rbac.authorization.k8s.io/aggregate-to-admin: 'true' - name: logging-application-logs-reader-aggregate -rules: - - apiGroups: - - loki.grafana.com - resourceNames: - - logs - resources: - - application - verbs: - - get diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_ingester_stuck.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_ingester_stuck.yaml deleted file mode 100644 index dcca6fb..0000000 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_ingester_stuck.yaml +++ /dev/null @@ -1,153 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - annotations: {} - labels: - name: loki-ingester-check - name: loki-ingester-check - namespace: openshift-logging ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - annotations: {} - labels: - name: loki-ingester-check - name: loki-ingester-check - namespace: openshift-logging -rules: - - apiGroups: - - '' - resources: - - pods - - pods/exec - verbs: - - get - - list - - watch - - create - - delete - - patch - - update ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - annotations: {} - labels: - name: loki-ingester-check - name: loki-ingester-check - namespace: openshift-logging -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: loki-ingester-check -subjects: - - kind: ServiceAccount - name: loki-ingester-check ---- -apiVersion: v1 -data: - wal-check.sh: | - #!/bin/bash - - set -e -o pipefail - - # Check if pod is in stuck state. - function check_pod() { - POD_NAME="loki-ingester-${1}" - echo "checking POD ${POD_NAME}" - PHASE=$(kubectl -n openshift-logging get po ${POD_NAME} -oyaml | yq '.status.phase') - if [ ${PHASE} != "Running" ]; then - return 0 - fi - READY=$(kubectl -n openshift-logging get po ${POD_NAME} -oyaml | yq '.status.conditions[] | select(.type == "ContainersReady") | .status') - if [ ${READY} == "True" ]; then - return 0 - fi - return 1 - } - - # Check directories of pod and remove non-existing checkpoint if present. - function check_dir() { - shopt -s extglob - POD_NAME="loki-ingester-${1}" - echo "checking DIR ${POD_NAME}" - DIR_CHP=$(kubectl -n openshift-logging exec -i ${POD_NAME} -- ls /tmp/wal | grep -o "^checkpoint\.[0-9]*$") - PATTERN=$(echo ${DIR_CHP} | sed 's/[^0-9]*//g') - DIR_WAL=$(kubectl -n openshift-logging exec -i ${POD_NAME} -- ls /tmp/wal | grep -o "^0*${PATTERN}$" || exit 0) - if [ -z $DIR_WAL ]; then - kubectl -n openshift-logging exec -i ${POD_NAME} -- rm -rf /tmp/wal/${DIR_CHP} - kubectl -n openshift-logging delete po ${POD_NAME} - fi - } - - # Check if pods are in stuck state for longer than ${SLEEP_TIME}. - # Only fix 1 pod at a time and immediatly exit if it is fixed. - function fix_pod() { - if ! check_pod $1; then - echo "stuck POD, waiting ${SLEEP_TIME}" - sleep ${SLEEP_TIME} - if ! check_pod $1; then - check_dir $1 - exit 0 - fi - fi - } - - fix_pod 0 - fix_pod 1 - - exit 0 -kind: ConfigMap -metadata: - annotations: {} - labels: - name: loki-ingester-check - name: loki-ingester-check - namespace: openshift-logging ---- -apiVersion: batch/v1 -kind: CronJob -metadata: - annotations: {} - labels: - name: loki-ingester-check - name: loki-ingester-check - namespace: openshift-logging -spec: - concurrencyPolicy: Forbid - failedJobsHistoryLimit: 0 - jobTemplate: - spec: - activeDeadlineSeconds: 360 - backoffLimit: 1 - template: - spec: - containers: - - command: - - /usr/local/bin/wal-check.sh - env: - - name: SLEEP_TIME - value: 2m - image: quay.io/appuio/oc:v4.14 - imagePullPolicy: IfNotPresent - name: check-pod - ports: [] - stdin: false - tty: false - volumeMounts: - - mountPath: /usr/local/bin/wal-check.sh - name: wal-check - readOnly: true - subPath: wal-check.sh - nodeSelector: - node-role.kubernetes.io/infra: '' - restartPolicy: Never - serviceAccountName: loki-ingester-check - volumes: - - configMap: - defaultMode: 364 - name: loki-ingester-check - name: wal-check - schedule: '*/10 * * * *' diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_missing_metrics_token.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_missing_metrics_token.yaml deleted file mode 100644 index 0b86fe6..0000000 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/50_fix_missing_metrics_token.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - annotations: - argocd.argoproj.io/sync-options: Prune=false,Delete=false - kubernetes.io/service-account.name: loki-operator-controller-manager-metrics-reader - labels: - name: loki-operator-controller-manager-metrics-token - name: loki-operator-controller-manager-metrics-token - namespace: openshift-operators-redhat -type: kubernetes.io/service-account-token diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml deleted file mode 100644 index 614581b..0000000 --- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml +++ /dev/null @@ -1,245 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - labels: - name: syn-loki-logging-rules - name: syn-loki-logging-rules - namespace: openshift-logging -spec: - groups: - - name: logging_loki.alerts - rules: - - alert: SYN_LokiRequestErrors - annotations: - message: '{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf - "%.2f" $value }}% errors.' - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Request-Errors - summary: At least 10% of requests are responded by 5xx server errors. - expr: | - sum( - job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m{status_code=~"5.."} - ) by (job, namespace, route) - / - sum( - job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m - ) by (job, namespace, route) - * 100 - > 10 - for: 15m - labels: - severity: critical - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiStackWriteRequestErrors - annotations: - message: '{{ printf "%.2f" $value }}% of write requests from {{ $labels.job - }} in {{ $labels.namespace }} are returned with server errors.' - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#LokiStack-Write-Request-Errors - summary: At least 10% of write requests to the lokistack-gateway are responded - with 5xx server errors. - expr: | - sum( - code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{code=~"5..", handler="push"} - ) by (job, namespace) - / - sum( - code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{handler="push"} - ) by (job, namespace) - * 100 - > 10 - for: 15m - labels: - severity: critical - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiStackReadRequestErrors - annotations: - message: '{{ printf "%.2f" $value }}% of query requests from {{ $labels.job - }} in {{ $labels.namespace }} are returned with server errors.' - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#LokiStack-Read-Request-Errors - summary: At least 10% of query requests to the lokistack-gateway are responded - with 5xx server errors. - expr: | - sum( - code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{code=~"5..", handler=~"query|query_range|label|labels|label_values"} - ) by (job, namespace) - / - sum( - code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{handler=~"query|query_range|label|labels|label_values"} - ) by (job, namespace) - * 100 - > 10 - for: 15m - labels: - severity: critical - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiRequestPanics - annotations: - message: '{{ $labels.job }} is experiencing an increase of {{ $value }} - panics.' - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Request-Panics - summary: A panic was triggered. - expr: | - sum( - increase( - loki_panic_total[10m] - ) - ) by (job, namespace) - > 0 - labels: - severity: critical - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiRequestLatency - annotations: - message: '{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf - "%.2f" $value }}s 99th percentile latency.' - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Request-Latency - summary: The 99th percentile is experiencing high latency (higher than - 1 second). - expr: | - histogram_quantile(0.99, - sum( - irate( - loki_request_duration_seconds_bucket{route!~"(?i).*tail.*"}[1m] - ) - ) by (job, le, namespace, route) - ) - > 1 - for: 15m - labels: - severity: critical - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiTenantRateLimit - annotations: - message: '{{ $labels.job }} {{ $labels.route }} is experiencing 429 errors.' - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Tenant-Rate-Limit - summary: At least 10% of requests are responded with the rate limit error - code. - expr: | - sum( - job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m{status_code="429"} - ) by (job, namespace, route) - / - sum( - job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m - ) by (job, namespace, route) - * 100 - > 10 - for: 15m - labels: - severity: warning - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiStorageSlowWrite - annotations: - message: The storage path is experiencing slow write response rates. - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Storage-Slow-Write - summary: The storage path is experiencing slow write response rates. - expr: | - histogram_quantile(0.99, - sum( - job_le_namespace_operation:loki_boltdb_shipper_request_duration_seconds_bucket:rate5m{operation="WRITE"} - ) by (job, le, namespace) - ) - > 1 - for: 15m - labels: - severity: warning - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiStorageSlowRead - annotations: - message: The storage path is experiencing slow read response rates. - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Storage-Slow-Read - summary: The storage path is experiencing slow read response rates. - expr: | - histogram_quantile(0.99, - sum( - job_le_namespace_operation:loki_boltdb_shipper_request_duration_seconds_bucket:rate5m{operation="Shipper.Query"} - ) by (job, le, namespace) - ) - > 5 - for: 15m - labels: - severity: warning - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiWritePathHighLoad - annotations: - message: The write path is experiencing high load. - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Write-Path-High-Load - summary: The write path is experiencing high load, causing backpressure - storage flushing. - expr: | - sum( - loki_ingester_wal_replay_flushing - ) by (job, namespace) - > 0 - for: 15m - labels: - severity: warning - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiReadPathHighLoad - annotations: - message: The read path is experiencing high load. - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Read-Path-High-Load - summary: The read path has high volume of queries, causing longer response - times. - expr: | - histogram_quantile(0.99, - sum( - rate( - loki_logql_querystats_latency_seconds_bucket[5m] - ) - ) by (job, le, namespace) - ) - > 30 - for: 15m - labels: - severity: warning - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokiDiscardedSamplesWarning - annotations: - message: |- - Loki in namespace {{ $labels.namespace }} is discarding samples in the "{{ $labels.tenant }}" tenant during ingestion. - Samples are discarded because of "{{ $labels.reason }}" at a rate of {{ .Value | humanize }} samples per second. - runbook_url: '[[ .RunbookURL]]#Loki-Discarded-Samples-Warning' - summary: Loki is discarding samples during ingestion because they fail - validation. - expr: | - sum by(namespace, tenant, reason) ( - irate(loki_discarded_samples_total{ - reason!="rate_limited", - reason!="per_stream_rate_limit", - reason!="stream_limit"}[2m]) - ) - > 0 - for: 15m - labels: - severity: warning - syn: 'true' - syn_component: openshift4-logging - - alert: SYN_LokistackSchemaUpgradesRequired - annotations: - message: |- - The LokiStack "{{ $labels.stack_name }}" in namespace "{{ $labels.stack_namespace }}" is using a storage schema - configuration that does not contain the latest schema version. It is recommended to update the schema - configuration to update the schema version to the latest version in the future. - runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Lokistack-Schema-Upgrades-Required - summary: One or more of the deployed LokiStacks contains an outdated storage - schema configuration. - expr: | - sum ( - lokistack_status_condition{reason="StorageNeedsSchemaUpdate",status="true"} - ) by (stack_namespace, stack_name) - > 0 - for: 1m - labels: - severity: warning - syn: 'true' - syn_component: openshift4-logging diff --git a/tests/multilineerr.yml b/tests/multilineerr.yml index b91a325..dfb1781 100644 --- a/tests/multilineerr.yml +++ b/tests/multilineerr.yml @@ -23,8 +23,23 @@ parameters: defaultSourceNamespace: openshift-operators-redhat openshift4_logging: + components: + lokistack: + enabled: false + clusterLogForwarder: + filters: + detectexception: + type: detectMultilineException + parse-json: + type: parse + labels: + type: openshiftLabels + openshiftLabels: + foo: bar pipelines: application-logs: - parse: json - detectMultilineErrors: true + filterRefs: + - detectexception + - labels + - parse-json