From ef9f4eb93199860e6228f6ed61b4839dac08897d Mon Sep 17 00:00:00 2001 From: Luka Racic Date: Tue, 8 Oct 2024 21:39:34 +0200 Subject: [PATCH] Revert "17036 FIX heartbeat_crm: Handle cases when pacemaker service is not running" This reverts commit e4751a4ec695d243ac090e67fe858ec7b12b30eb. Reason for revert: It causes a new service to appear on non-clustered setups Change-Id: I56e1522224dd153eedf495343c546894eb8aa7de --- .werks/17036.md | 17 ------- agents/check_mk_agent.linux | 12 ++--- .../plugins/agent_based/test_heartbeat_crm.py | 49 ------------------- 3 files changed, 3 insertions(+), 75 deletions(-) delete mode 100644 .werks/17036.md diff --git a/.werks/17036.md b/.werks/17036.md deleted file mode 100644 index 04662ceb6e9..00000000000 --- a/.werks/17036.md +++ /dev/null @@ -1,17 +0,0 @@ -[//]: # (werk v2) -# heartbeat_crm: Handle cases when pacemaker service is not running - -key | value ----------- | --- -date | 2024-09-16T21:35:09+00:00 -version | 2.3.0p18 -class | fix -edition | cre -component | checks -level | 1 -compatible | yes - -The creation of the agent section depended on the pacemaker service being running. If this was not the case, the section was left empty, causing the services to become stale. -From now on, if the pacemaker service is not running, the service will go into CRIT state and the summary will indicate that the connection was not possible/refused. - -Also, the agent now checks for the existence of 'crm_mon' on the system, as this is a prerequisite for further command execution. diff --git a/agents/check_mk_agent.linux b/agents/check_mk_agent.linux index 7499a909739..fa2b3a5a387 100755 --- a/agents/check_mk_agent.linux +++ b/agents/check_mk_agent.linux @@ -1040,15 +1040,9 @@ section_drbd() { } section_heartbeat() { - if command -v crm_mon >/dev/null 2>&1 || [ -S /var/run/heartbeat/crm/cib_ro ] || [ -S /var/run/crm/cib_ro ] || pgrep "^(crmd|pacemaker-contr)$" >/dev/null 2>&1; then - crm_output=$(TZ=UTC crm_mon -1 -r | grep -v ^$ | sed 's/^ //; /^\sResource Group:/,$ s/^\s//; s/^\s/_/g') - if [ -n "$crm_output" ]; then - echo '<<>>' - echo "$crm_output" - else - echo '<<>>' - crm_mon -1 -r 2>&1 - fi + if [ -S /var/run/heartbeat/crm/cib_ro ] || [ -S /var/run/crm/cib_ro ] || pgrep "^(crmd|pacemaker-contr)$" >/dev/null 2>&1; then + echo '<<>>' + TZ=UTC crm_mon -1 -r | grep -v ^$ | sed 's/^ //; /^\sResource Group:/,$ s/^\s//; s/^\s/_/g' fi if inpath cl_status; then diff --git a/tests/unit/cmk/base/plugins/agent_based/test_heartbeat_crm.py b/tests/unit/cmk/base/plugins/agent_based/test_heartbeat_crm.py index f8e80439096..948fea7879e 100644 --- a/tests/unit/cmk/base/plugins/agent_based/test_heartbeat_crm.py +++ b/tests/unit/cmk/base/plugins/agent_based/test_heartbeat_crm.py @@ -217,26 +217,6 @@ def _get_section_3() -> Section: return section -@pytest.fixture(name="section_no_cluster", scope="module") -def _get_section_no_cluster() -> Section: - section = parse_heartbeat_crm([["Error: cluster is not available on this node"]]) - assert section - return section - - -@pytest.fixture(name="section_connection_refused", scope="module") -def _get_section_connection_refused() -> Section: - section = parse_heartbeat_crm( - [ - [ - "error: Could not connect to launcher: Connection refused crm_mon: Connection to cluster failed: Connection refused" - ] - ] - ) - assert section - return section - - def test_discover_heartbeat_crm(section_1: Section) -> None: assert list(discover_heartbeat_crm({"naildown_dc": False}, section_1)) == [ Service(parameters={"num_nodes": 2, "num_resources": 3}), @@ -308,35 +288,6 @@ def test_check_heartbeat_crm_crit(section_2: Section) -> None: ] -def test_check_heartbeat_crm_no_cluster_crit(section_no_cluster: Section) -> None: - assert list( - _check_heartbeat_crm( - {"dc": "hasi", "max_age": 60, "num_nodes": 1, "num_resources": 4}, - section_no_cluster, - 1559939704.5458105, - ) - ) == [ - Result(state=State.CRIT, summary="Error: cluster is not available on this node"), - ] - - -def test_check_heartbeat_crm_failed_connection_crit( - section_connection_refused: Section, -) -> None: - assert list( - _check_heartbeat_crm( - {"dc": "hasi", "max_age": 60, "num_nodes": 1, "num_resources": 4}, - section_connection_refused, - 1559939704.5458105, - ) - ) == [ - Result( - state=State.CRIT, - summary="error: Could not connect to launcher: Connection refused crm_mon: Connection to cluster failed: Connection refused", - ), - ] - - def test_check_heartbeat_crm_resources_promotable_clone(section_3: Section) -> None: assert list( check_heartbeat_crm_resources(