diff --git a/.werks/17222 b/.werks/17222
new file mode 100644
index 00000000000..7aa9b8cff5a
--- /dev/null
+++ b/.werks/17222
@@ -0,0 +1,17 @@
+Title: HW/SW inventory: Retain inventory history on central site
+Class: fix
+Compatible: compat
+Component: omd
+Date: 1736779496
+Edition: cre
+Level: 1
+Version: 2.2.0p39
+
+This change affects customers, which are monitoring hosts on a remote site.
+Previously, the Inventory history of host
would sporadically disappear on the central site, even though it was shown correctly on the remote site.
+This was due to an incorrect implementation of the diskspace utility.
+This implementation assumed that the historic data was only needed on the remote site.
+With this change, diskspace will only consider deleting the inventory history of hosts, which have been deleted.
+
+The faulty behaviour was introduced in #Werk 13422.
+A similar, but different issue, is addressed in #Werk 17223.
diff --git a/omd/packages/check_mk/diskspace b/omd/packages/check_mk/diskspace
index bfece9f5218..0033a946d02 100644
--- a/omd/packages/check_mk/diskspace
+++ b/omd/packages/check_mk/diskspace
@@ -1,6 +1,5 @@
cleanup_paths = [
'var/mkeventd/history/*.log',
'var/mkeventd/messages/*.log',
- 'var/check_mk/inventory_archive/*/*',
'var/check_mk/core/archive/history-*',
]
diff --git a/omd/packages/maintenance/diskspace b/omd/packages/maintenance/diskspace
index c22a54fa19b..16b818e8082 100755
--- a/omd/packages/maintenance/diskspace
+++ b/omd/packages/maintenance/diskspace
@@ -15,7 +15,6 @@ from typing import Any, Literal
from cmk.utils.paths import omd_root, var_dir
from cmk.utils.render import fmt_bytes
-from cmk.utils.type_defs import HostName
opt_verbose = "-v" in sys.argv
opt_force = "-f" in sys.argv
@@ -215,57 +214,32 @@ def _oldest_candidate(file_infos: dict) -> str | None:
return None
-def _cleanup_host_directory_for_local_hosts(
- cleanup_hosts: set[HostName], base_path: str
-) -> list[HostName]:
+def _cleanup_host_directories(unaffected_hosts: set[str], base_path: str) -> set[str]:
"""
First find all directories not related to a known host.
"""
if not os.path.isdir(base_path):
- return []
+ return set()
- unrelated_dirs: list[str] = []
- for host_dir in os.listdir(base_path):
- if host_dir not in cleanup_hosts:
- unrelated_dirs.append(host_dir)
+ abandoned = {host_dir for host_dir in os.listdir(base_path) if host_dir not in unaffected_hosts}
- cleaned_up_hosts = _check_threshold_and_delete(unrelated_dirs, base_path)
+ return _check_threshold_and_delete(abandoned, base_path)
- return cleaned_up_hosts
-
-
-def _cleanup_host_directory_for_remote_hosts(cleaned_up_remote_hosts: set, base_path: str) -> list:
- """
- Find all directories existing on the local site and return a list of all
- matching hosts that are known on remote sites
- """
- if not os.path.isdir(base_path):
- return []
- unrelated_dirs = []
- for host_dir in os.listdir(base_path):
- if host_dir in cleaned_up_remote_hosts:
- unrelated_dirs.append(host_dir)
-
- cleaned_up_hosts = _check_threshold_and_delete(unrelated_dirs, base_path)
-
- return cleaned_up_hosts
-
-
-def _check_threshold_and_delete(unrelated_dirs: list[str], base_path: str) -> list:
+def _check_threshold_and_delete(abandoned_hosts: set[str], base_path: str) -> set[str]:
"""
Find the latest modified file for each directory. When the latest
modified file is older than the threshold, delete all files including
the host base directory.
"""
assert cleanup_abandoned_host_files is not None
- cleaned_up_hosts = []
- for unrelated_dir in unrelated_dirs:
+ cleaned_up_hosts = set()
+ for unrelated_dir in abandoned_hosts:
path = f"{base_path}/{unrelated_dir}"
mtime: float = _newest_modification_time_in_dir(path)
if mtime < time.time() - cleanup_abandoned_host_files:
_delete_files_and_base_directory(path, "abandoned host")
- cleaned_up_hosts.append(unrelated_dir)
+ cleaned_up_hosts.add(unrelated_dir)
else:
_verbose("Found abandoned host path (but not old enough): %s" % path)
@@ -296,16 +270,7 @@ def _newest_modification_time_in_dir(dir_path: str) -> float:
return mtime
-def _get_configured_hosts() -> tuple[set, set, bool]:
- """
- Get local known hosts for all kind of sites (central and remote).
- For central sites, get also all known hosts, even the ones that are
- assigned to remote sites.
- """
- local_site_hosts: set = set()
- all_hosts: set = set()
- is_wato_remote_site = True
-
+def _is_wato_remote_site() -> bool:
file_vars: dict = {}
if (
distr_wato_filepath := Path("~/etc/check_mk/conf.d/distributed_wato.mk").expanduser()
@@ -316,21 +281,63 @@ def _get_configured_hosts() -> tuple[set, set, bool]:
file_vars,
)
- if not file_vars.get("is_wato_slave_site", False):
- is_wato_remote_site = False
- all_hosts.update(
+ return file_vars.get("is_wato_slave_site", False)
+
+
+def _do_cleanup_central_site(retention_time: int, local_site_hosts: set[str]) -> None:
+ try:
+ all_hosts = set(
subprocess.check_output(
["check_mk", "--list-hosts", "--all-sites", "--include-offline"], encoding="utf-8"
).splitlines()
)
+ except subprocess.CalledProcessError as e:
+ _verbose("Failed to get site hosts (%s). Skipping abandoned host files cleanup" % e)
+ return
+
+ cleaned_up = (
+ _cleanup_host_directories(
+ all_hosts,
+ "%s/inventory_archive" % var_dir,
+ )
+ | _cleanup_host_directories(
+ local_site_hosts,
+ "%s/var/pnp4nagios/perfdata" % omd_root,
+ )
+ | _cleanup_host_directories(
+ local_site_hosts,
+ "%s/rrd" % var_dir,
+ )
+ )
+
+ # Now call Check_MK to clean up other files for the hosts which we have
+ # cleaned up abandoned files for.
+ if cleaned_up_deleted_hosts := cleaned_up - all_hosts:
+ _do_automation_call(cleaned_up_deleted_hosts, "delete-hosts")
+ if cleaned_up_remote_hosts := cleaned_up & (all_hosts - local_site_hosts):
+ _do_automation_call(cleaned_up_remote_hosts, "delete-hosts-known-remote")
+
- local_site_hosts.update(
- subprocess.check_output(
- ["check_mk", "--list-hosts", "--include-offline"], encoding="utf-8"
- ).splitlines()
+def _do_cleanup_remote_site(retention_time: int, local_site_hosts: set[str]) -> None:
+ cleaned_up_non_local_hosts = (
+ _cleanup_host_directories(
+ local_site_hosts,
+ "%s/inventory_archive" % var_dir,
+ )
+ | _cleanup_host_directories(
+ local_site_hosts,
+ "%s/var/pnp4nagios/perfdata" % omd_root,
+ )
+ | _cleanup_host_directories(
+ local_site_hosts,
+ "%s/rrd" % var_dir,
+ )
)
- return all_hosts, local_site_hosts, is_wato_remote_site
+ # Now call Check_MK to clean up other files for the hosts which we have
+ # cleaned up abandoned files for.
+ if cleaned_up_non_local_hosts:
+ _do_automation_call(cleaned_up_non_local_hosts, "delete-hosts")
def _do_cleanup_abandoned_host_files() -> None:
@@ -345,8 +352,14 @@ def _do_cleanup_abandoned_host_files() -> None:
if not cleanup_abandoned_host_files:
return
+ is_wato_remote_site = _is_wato_remote_site()
+
try:
- all_hosts, local_site_hosts, is_wato_remote_site = _get_configured_hosts()
+ local_site_hosts = set(
+ subprocess.check_output(
+ ["check_mk", "--list-hosts", "--include-offline"], encoding="utf-8"
+ ).splitlines()
+ )
except subprocess.CalledProcessError as e:
_verbose("Failed to get site hosts (%s). Skipping abandoned host files cleanup" % e)
return
@@ -355,44 +368,10 @@ def _do_cleanup_abandoned_host_files() -> None:
_verbose("Found no hosts. Be careful and not cleaning up anything.")
return
- cleanup_hosts = all_hosts if not is_wato_remote_site else local_site_hosts
-
- # Base directories where each host has a sub-directory below with
- # host related files inside
- path_patterns: list[str] = [
- "%s/inventory_archive" % var_dir,
- "%s/rrd" % var_dir,
- "%s/var/pnp4nagios/perfdata" % omd_root,
- ]
-
- cleaned_up_local_hosts: set = set()
- for base_path in path_patterns:
- cleaned_up_local_hosts.update(
- _cleanup_host_directory_for_local_hosts(
- cleanup_hosts,
- base_path,
- )
- )
-
- # Now call Check_MK to clean up other files for the hosts which we have
- # cleaned up abandoned files for.
- if cleaned_up_local_hosts:
- _do_automation_call(cleaned_up_local_hosts, "delete-hosts")
-
- # Now call Check_MK to clean up files for hosts that still have files local
- # but are only known on remote sites
- if all_hosts:
- remote_site_hosts = all_hosts - local_site_hosts
- cleaned_up_remote_hosts: set = set()
- for base_path in path_patterns:
- cleaned_up_remote_hosts.update(
- _cleanup_host_directory_for_remote_hosts(
- remote_site_hosts,
- base_path,
- )
- )
- if cleaned_up_remote_hosts:
- _do_automation_call(cleaned_up_remote_hosts, "delete-hosts-known-remote")
+ if is_wato_remote_site:
+ _do_cleanup_remote_site(cleanup_abandoned_host_files, local_site_hosts)
+ else:
+ _do_cleanup_central_site(cleanup_abandoned_host_files, local_site_hosts)
def _cleanup_aged() -> None: