From 418c120db01cb5030ac6d51e2b54b5277e44738b Mon Sep 17 00:00:00 2001 From: Sofia Colakovic Date: Thu, 8 Aug 2024 17:40:07 +0200 Subject: [PATCH] 16867 FIX azure: Remove unnecessary 'metric not found' errors When querying metrics, Azure agent was reporting each missing metric in the details of the 'Azure Agent Info' service. This didn't influence the status of the service but still implied an error. It was misleading because some metrics aren't defined in some types of resources. Now, those metrics are optional and won't cause an error. SUP-19726 Change-Id: If8f370610c32252f0f9499c4bfed4a60a4910203 --- .werks/16867.md | 19 +++++++++++++++++++ cmk/special_agents/agent_azure.py | 30 +++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 .werks/16867.md diff --git a/.werks/16867.md b/.werks/16867.md new file mode 100644 index 00000000000..45eb9ba6dc3 --- /dev/null +++ b/.werks/16867.md @@ -0,0 +1,19 @@ +[//]: # (werk v2) +# azure: Remove unnecessary 'metric not found' errors + +key | value +---------- | --- +compatible | yes +version | 2.3.0p13 +date | 2024-08-08T15:40:36+00:00 +level | 1 +class | fix +component | checks +edition | cre + +When querying metrics, Azure agent was reporting each missing metric in the +details of the 'Azure Agent Info' service. This didn't influence the status +of the service but still implied an error. + +It was misleading because some metrics aren't defined in some types +of resources. Now, those metrics are optional and won't cause an error. \ No newline at end of file diff --git a/cmk/special_agents/agent_azure.py b/cmk/special_agents/agent_azure.py index 7dd1c0f5801..8136b02873d 100644 --- a/cmk/special_agents/agent_azure.py +++ b/cmk/special_agents/agent_azure.py @@ -197,6 +197,22 @@ ], } +OPTIONAL_METRICS: Mapping[str, Sequence[str]] = { + "Microsoft.Sql/servers/databases": [ + "storage_percent", + "deadlock", + "dtu_consumption_percent", + ], + "Microsoft.DBforMySQL/servers": ["seconds_behind_master"], + "Microsoft.DBforMySQL/flexibleServers": ["replication_lag"], + "Microsoft.DBforPostgreSQL/servers": ["pg_replica_log_delay_in_seconds"], + "Microsoft.Network/loadBalancers": ["AllocatedSnatPorts", "UsedSnatPorts"], + "Microsoft.Compute/virtualMachines": [ + "CPU Credits Consumed", + "CPU Credits Remaining", + ], +} + class TagsImportPatternOption(enum.Enum): ignore_all = "IGNORE_ALL" @@ -1340,7 +1356,7 @@ def get_validity_from_args(self, *args: Any) -> bool: return True def get_live_data(self, *args: Any) -> Any: - mgmt_client, resource_id, err = args + mgmt_client, resource_id, resource_type, err = args metricnames, interval, aggregation, filter_ = self.metric_definition raw_metrics = mgmt_client.metrics( @@ -1358,7 +1374,11 @@ def get_live_data(self, *args: Any) -> Any: if parsed_metric is not None: metrics.append(parsed_metric) else: - msg = "metric not found: {} ({})".format(raw_metric["name"]["value"], aggregation) + metric_name = raw_metric["name"]["value"] + if metric_name in OPTIONAL_METRICS.get(resource_type, []): + continue + + msg = "metric not found: {} ({})".format(metric_name, aggregation) err.add("info", resource_id, msg) LOGGER.info(msg) @@ -1408,7 +1428,11 @@ def gather_metrics(mgmt_client, resource, debug=False): cache = MetricCache(resource, metric_def, NOW, debug=debug) try: resource.metrics += cache.get_data( - mgmt_client, resource.info["id"], err, use_cache=cache.cache_interval > 60 + mgmt_client, + resource.info["id"], + resource.info["type"], + err, + use_cache=cache.cache_interval > 60, ) except ApiError as exc: if debug: