Skip to content

Commit

Permalink
15876 FIX sap_hana_db_status: Be ok for passive nodes
Browse files Browse the repository at this point in the history
Change-Id: Iaf3ba01dc71386081ba2f1d3b1ee843ca90784d4
  • Loading branch information
mo-ki committed Oct 25, 2023
1 parent 2e48eaf commit deabca7
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 42 deletions.
14 changes: 14 additions & 0 deletions .werks/15876
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Title: sap_hana_db_status: Be ok for passive nodes
Class: fix
Compatible: compat
Component: checks
Date: 1698058635
Edition: cre
Knowledge: undoc
Level: 1
State: unknown
Version: 2.1.0p36

The service <i>"SAP HANA Database Status"</i> went critical on a passive node.
In now remains ok and reports <i>"System is in passive mode"</i>.

44 changes: 37 additions & 7 deletions cmk/base/plugins/agent_based/sap_hana_db_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.

from typing import Dict
from typing import Mapping, Optional

from .agent_based_api.v1 import IgnoreResultsError, register, Result, Service, State
from .agent_based_api.v1.type_defs import CheckResult, DiscoveryResult, StringTable
from .utils import sap_hana

SectionDBStatus = Mapping[str, str]

MAP_DB_STATUS = {"OK": State.OK, "WARNING": State.WARN}


def parse_sap_hana_db_status(string_table: StringTable) -> Dict[str, str]:
def parse_sap_hana_db_status(string_table: StringTable) -> SectionDBStatus:
return {
sid_instance: lines[0][0] if lines else ""
for sid_instance, lines in sap_hana.parse_sap_hana(string_table).items()
Expand All @@ -26,23 +28,51 @@ def parse_sap_hana_db_status(string_table: StringTable) -> Dict[str, str]:
)


def discovery_sap_hana_db_status(section: sap_hana.ParsedSection) -> DiscoveryResult:
for item in section:
def discover_sap_hana_db_status(
section_sap_hana_db_status: Optional[SectionDBStatus],
section_sap_hana_replication_status: Optional[sap_hana.ParsedSection],
) -> DiscoveryResult:
if not section_sap_hana_db_status:
return
for item in section_sap_hana_db_status:
yield Service(item=item)


def check_sap_hana_db_status(item: str, section: Dict[str, str]) -> CheckResult:
db_status = section.get(item)
def check_sap_hana_db_status(
item: str,
section_sap_hana_db_status: Optional[SectionDBStatus],
section_sap_hana_replication_status: Optional[sap_hana.ParsedSection],
) -> CheckResult:
if section_sap_hana_db_status is None:
return

db_status = section_sap_hana_db_status.get(item)

if not db_status:
raise IgnoreResultsError("Login into database failed.")

db_state = MAP_DB_STATUS.get(db_status, State.CRIT)
repl_state = (
None
if section_sap_hana_replication_status is None
else section_sap_hana_replication_status.get(item, {}).get("sys_repl_status")
)

if (
db_state is State.CRIT
and repl_state is not None
and sap_hana.get_replication_state(repl_state)[1] == "passive"
):
yield Result(state=State.OK, summary="System is in passive mode")
return

yield Result(state=MAP_DB_STATUS.get(db_status, State.CRIT), summary=db_status)


register.check_plugin(
name="sap_hana_db_status",
service_name="SAP HANA Database Status %s",
discovery_function=discovery_sap_hana_db_status,
sections=["sap_hana_db_status", "sap_hana_replication_status"],
discovery_function=discover_sap_hana_db_status,
check_function=check_sap_hana_db_status,
)
22 changes: 4 additions & 18 deletions cmk/base/plugins/agent_based/sap_hana_replication_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,13 @@
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.

from typing import Any, Final, Mapping
from collections.abc import Mapping
from typing import Any

from .agent_based_api.v1 import IgnoreResultsError, register, Result, Service, State
from .agent_based_api.v1 import IgnoreResultsError, register, Result, Service
from .agent_based_api.v1.type_defs import CheckResult, DiscoveryResult, StringTable
from .utils import sap_hana

SAP_HANA_REPL_STATUS_MAP: Final = {
"0": (State.UNKNOWN, "unknown status from replication script", "state_unknown"),
"10": (State.CRIT, "no system replication", "state_no_replication"),
"11": (State.CRIT, "error", "state_error"),
# "12" accuatly stands for "unknown replication status", but as per customer"s information
# (see SUP-1436), this should be indicated as "passive" replication aka secondary SAP HANA node.
"12": (State.OK, "passive", "state_replication_unknown"),
"13": (State.WARN, "initializing", "state_initializing"),
"14": (State.OK, "syncing", "state_syncing"),
"15": (State.OK, "active", "state_active"),
}


def parse_sap_hana_replication_status(string_table: StringTable) -> sap_hana.ParsedSection:
section: sap_hana.ParsedSection = {}
Expand Down Expand Up @@ -62,10 +51,7 @@ def check_sap_hana_replication_status(
if not data:
raise IgnoreResultsError("Login into database failed.")

sys_repl_status = data["sys_repl_status"]
state, state_readable, param_key = SAP_HANA_REPL_STATUS_MAP.get(
sys_repl_status, (State.UNKNOWN, "unknown[%s]" % sys_repl_status, "state_unknown")
)
state, state_readable, param_key = sap_hana.get_replication_state(data["sys_repl_status"])

yield Result(
state=params.get(param_key, state), summary="System replication: %s" % state_readable
Expand Down
21 changes: 21 additions & 0 deletions cmk/base/plugins/agent_based/utils/sap_hana.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,24 @@ def parse_sap_hana_cluster_aware(info):
elif instance is not None:
instance.append([e.strip('"') for e in line])
return parsed


def get_replication_state(raw: str) -> tuple[State, str, str]:
if raw == "0":
return State.UNKNOWN, "unknown status from replication script", "state_unknown"
if raw == "10":
return State.CRIT, "no system replication", "state_no_replication"
if raw == "11":
return State.CRIT, "error", "state_error"
if raw == "12":
# "12" actually stands for "unknown replication status", but as per customer's information
# (see SUP-1436), this should be indicated as "passive" replication aka secondary SAP HANA node.
return State.OK, "passive", "state_replication_unknown"
if raw == "13":
return State.WARN, "initializing", "state_initializing"
if raw == "14":
return State.OK, "syncing", "state_syncing"
if raw == "15":
return State.OK, "active", "state_active"

return State.UNKNOWN, "unknown[%s]" % raw, "state_unknown"
56 changes: 39 additions & 17 deletions tests/unit/cmk/base/plugins/agent_based/test_sap_hana_db_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
# conditions defined in the file COPYING, which is part of this source code package.

import pytest
from typing import Mapping, Sequence

from cmk.utils.type_defs import CheckPluginName, SectionName
import pytest

from cmk.base.plugins.agent_based import sap_hana_db_status as shds
from cmk.base.plugins.agent_based.agent_based_api.v1 import (
IgnoreResultsError,
Result,
Service,
State,
)
from cmk.base.plugins.agent_based.agent_based_api.v1.type_defs import CheckResult, StringTable


@pytest.mark.parametrize(
Expand All @@ -28,9 +30,8 @@
)
],
)
def test_parse_sap_hana_db_status(fix_register, info, expected_result):
section_plugin = fix_register.agent_sections[SectionName("sap_hana_db_status")]
assert section_plugin.parse_function(info) == expected_result
def test_parse_sap_hana_db_status(info: StringTable, expected_result: Mapping[str, str]) -> None:
assert shds.parse_sap_hana_db_status(info) == expected_result


@pytest.mark.parametrize(
Expand All @@ -45,10 +46,13 @@ def test_parse_sap_hana_db_status(fix_register, info, expected_result):
),
],
)
def test_inventory_sap_hana_db_status(fix_register, info, expected_result):
section = fix_register.agent_sections[SectionName("sap_hana_db_status")].parse_function(info)
plugin = fix_register.check_plugins[CheckPluginName("sap_hana_db_status")]
assert list(plugin.discovery_function(section)) == expected_result
def test_inventory_sap_hana_db_status(
info: StringTable, expected_result: Sequence[Service]
) -> None:
assert (
list(shds.discover_sap_hana_db_status(shds.parse_sap_hana_db_status(info), None))
== expected_result
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -83,10 +87,11 @@ def test_inventory_sap_hana_db_status(fix_register, info, expected_result):
),
],
)
def test_check_sap_hana_db_status(fix_register, item, info, expected_result):
section = fix_register.agent_sections[SectionName("sap_hana_db_status")].parse_function(info)
plugin = fix_register.check_plugins[CheckPluginName("sap_hana_db_status")]
assert list(plugin.check_function(item, section)) == expected_result
def test_check_sap_hana_db_status(
item: str, info: StringTable, expected_result: CheckResult
) -> None:
section = shds.parse_sap_hana_db_status(info)
assert list(shds.check_sap_hana_db_status(item, section, None)) == expected_result


@pytest.mark.parametrize(
Expand All @@ -100,8 +105,25 @@ def test_check_sap_hana_db_status(fix_register, item, info, expected_result):
),
],
)
def test_check_sap_hana_db_status_stale(fix_register, item, info):
section = fix_register.agent_sections[SectionName("sap_hana_db_status")].parse_function(info)
plugin = fix_register.check_plugins[CheckPluginName("sap_hana_db_status")]
def test_check_sap_hana_db_status_stale(item: str, info: StringTable) -> None:
section = shds.parse_sap_hana_db_status(info)
with pytest.raises(IgnoreResultsError):
list(plugin.check_function(item, section))
list(shds.check_sap_hana_db_status(item, section, None))


def test_check_sap_hana_ddb_status_passive_ok() -> None:
section = shds.parse_sap_hana_db_status(
[
["[[HXE 98]]"],
[
"Status: error, Details: hdbsql ERROR: * -10709: Connection failed (RTE:[89006] System call 'connect' failed, rc=113:No route to host"
],
]
)
assert all(
r.state is State.OK
for r in shds.check_sap_hana_db_status(
"HXE 98", section, {"HXE 98": {"sys_repl_status": "12"}}
)
if isinstance(r, Result)
)

0 comments on commit deabca7

Please sign in to comment.