diff --git a/scripts/standby_node_health_check/README.md b/scripts/standby_node_health_check/README.md index 61ee6470..f905f72c 100644 --- a/scripts/standby_node_health_check/README.md +++ b/scripts/standby_node_health_check/README.md @@ -8,10 +8,10 @@ This script is meant to help a UFM HA user, to make sure that his standby server 2. No prequesition are needed 3. The script is meant to run on a standby node only. 4. Place the script in a directory, for example under `/tmp` -5. Run the command `python3 standby_node_health_check --fabric-interfaces ib0 ib1 --mgmt-interfaces ens192` +5. Run the command `python3 standby_node_health_check --fabric-interfaces ib0 ib1 --mgmt-interface ens192` ## What the script is checking -1. checking if all given fabric interface are up. +1. checking if all given fabric interfaces are up. 2. Checking if all given management interface are up. 3. Checking ufm ha is configured. 5. Checking if the node is a standby. diff --git a/scripts/standby_node_health_check/standby_node_health_check.py b/scripts/standby_node_health_check/standby_node_health_check.py index ea43744a..220112cd 100644 --- a/scripts/standby_node_health_check/standby_node_health_check.py +++ b/scripts/standby_node_health_check/standby_node_health_check.py @@ -21,7 +21,7 @@ def configure_logger(): - logger_name = "standby_node_checker" + logger_name = "standby_node_health_checker" logger = logging.getLogger(logger_name) if not logger.hasHandlers(): @@ -88,6 +88,8 @@ class StandbyNodeHealthChecker: PCS_STATUS_COMMAND = "pcs status" COROSYNC_RINGS_COMMAND = "corosync-cfgtool -s" + # This regex is used to translte the output of ibdev2netdev, in case + # The user inputs ibx, we use it to find the matching mlx interface IBDEV2NETDEV_REGEX = re.compile(r"^([\w\d_]+) port \d ==> ([\w\d]+)") OLD_CORSYNC_RING_ID_REGEX = re.compile(r"^RING ID (\d+)") OLD_CORSYNC_RING_IP_REGEX = re.compile(r"id\ *= ([\d\.]+)") @@ -126,11 +128,11 @@ def _run_command(cls, command: str): def _check_ib_interfaces(self): result = True ib_interfaces_status = self._run_and_parse_ibstat() - ib_to_ml_map = self._get_ib_to_mlx_port_mapping() + ib_to_mlx_map = self._get_ib_to_mlx_port_mapping() for ib_interface in self._fabric_interfaces: ib_interface_to_validate = ib_interface if not ib_interface.startswith("mlx"): - ib_interface_to_validate = ib_to_ml_map.get(ib_interface, ib_interface) + ib_interface_to_validate = ib_to_mlx_map.get(ib_interface, ib_interface) if ib_interface_to_validate not in ib_interfaces_status: logger.warning("%s is not in the list of IB interfaces", ib_interface) result = False @@ -482,6 +484,7 @@ def check_drdb_role(self): def print_summary_information(self): logger.info("") logger.info("Executive summary:") + # If there is anything in the summary actions it means we have failures if len(self._summary_actions) > 0: for row in self._summary_actions: logger.info(row)