Skip to content

Commit

Permalink
omd update: generate crash report
Browse files Browse the repository at this point in the history
CMK-20526

Change-Id: I6b642f796fabb2545dbafaf9556b780098758561
  • Loading branch information
SoloJacobs committed Jan 14, 2025
1 parent ba3fdc4 commit 062550d
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 15 deletions.
22 changes: 22 additions & 0 deletions omd/packages/omd/omdlib/crash_reporting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env python3
# Copyright (C) 2024 Checkmk GmbH - License: GNU General Public License v2
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.


from cmk.ccc import version
from cmk.ccc.crash_reporting import ABCCrashReport, CrashReportStore, VersionInfo

from cmk.utils.paths import crash_dir, omd_root


class _OMDCrashReport(ABCCrashReport[VersionInfo]):
@classmethod
def type(cls) -> str:
return "omd"


def report_crash() -> str:
crash = _OMDCrashReport.from_exception(crash_dir, version.get_general_version_infos(omd_root))
CrashReportStore().save(crash)
return crash.ident_to_text()
42 changes: 27 additions & 15 deletions omd/packages/omd/omdlib/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
from typing import Literal, Self

from omdlib.contexts import SiteContext
from omdlib.crash_reporting import report_crash
from omdlib.tmpfs import prepare_and_populate_tmpfs, unmount_tmpfs_without_save
from omdlib.version_info import VersionInfo

from cmk.utils.paths import crash_dir


def store(site_dir: Path, relpath: Path | str, backup_dir: Path) -> None:
# `store` is only valid on files, symlinks and empty dirs.
Expand Down Expand Up @@ -173,11 +176,11 @@ def __enter__(self) -> Self:
self.backup_dir.mkdir()
except FileExistsError:
sys.exit(
"An unknown error occured before the update could be started. The folder "
f"{self.backup_dir} contains data from a failed update attempt. This data should "
"have been written back to the site directory and then have been deleted. "
"Check whether any files need to be restored from this directory. Then this folder "
"can be deleted and the update can be retried."
f"The folder {self.backup_dir} contains data from a failed update attempt. This "
"only happens, if a serious error occured during a previous update attempt. "
f"Please contact support. A crash report may be available in {crash_dir}. "
"Since the root cause of this error is not known to OMD, the site is an "
"unknown state and both, restarting or updating the site, can have unknown effects.\n"
)
backup_managed(self.site_dir, self.old_skel, self.new_skel, self.backup_dir)
store(self.site_dir, "version", self.backup_dir)
Expand All @@ -197,15 +200,24 @@ def __exit__(
exc_tb: TracebackType | None,
) -> Literal[False]:
if exc_type is not None:
if self.populated_tmpfs:
# Always leave the tmpfs unmounted. We currently are in the context of the new
# version (symlink has been restored, but python3 interpreter and dynamic libraries
# are pointing to the new context. Thus, we only umount here.
unmount_tmpfs_without_save(self.site_name, self.tmp_dir, False, False)
for relpath in HOOK_RELPATHS:
restore(self.site_dir, relpath, self.backup_dir)
_restore_version_meta_dir(self.site_dir, self.backup_dir)
restore(self.site_dir, "version", self.backup_dir)
restore_managed(self.site_dir, self.old_skel, self.new_skel, self.backup_dir)
try:
if self.populated_tmpfs:
# Always leave the tmpfs unmounted. We currently are in the context of the new
# version (symlink has been restored, but python3 interpreter and dynamic libraries
# are pointing to the new context. Thus, we only umount here.
unmount_tmpfs_without_save(self.site_name, self.tmp_dir, False, False)
for relpath in HOOK_RELPATHS:
restore(self.site_dir, relpath, self.backup_dir)
_restore_version_meta_dir(self.site_dir, self.backup_dir)
restore(self.site_dir, "version", self.backup_dir)
restore_managed(self.site_dir, self.old_skel, self.new_skel, self.backup_dir)
except Exception:
identity = report_crash()
sys.stderr.write(
f"A serious error occured, which resulted in a crash with id: {identity}\n"
"Please contact support with this crash id.\n"
"Since the root cause of this error is not known to OMD, the site is an "
"unknown state and both, restarting or updating the site, can have unknown effects.\n"
)
shutil.rmtree(self.backup_dir)
return False # Don't suppress the exception

0 comments on commit 062550d

Please sign in to comment.