From 16a79c8d04827982dca28c1486cf44faa994a7a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Murilo=20Menezes=20Mendon=C3=A7a?= Date: Tue, 2 Apr 2024 18:02:56 -0300 Subject: [PATCH] Feat: accept cron schedule for analyzers (#56) details: - include every N {field} on base regex on CronSchedule - fix order of SplitCron implementation - checks for "," and "-" cases only on the minute field - added tests --- tests/monitor/manager/test_monitor_setup.py | 74 ++++++++++++++++++- whylabs_toolkit/helpers/cron_validators.py | 49 ++++++++++++ .../monitor/manager/monitor_setup.py | 2 +- .../monitor/models/analyzer/analyzer.py | 16 +++- whylabs_toolkit/monitor/models/commons.py | 5 +- 5 files changed, 139 insertions(+), 7 deletions(-) create mode 100644 whylabs_toolkit/helpers/cron_validators.py diff --git a/tests/monitor/manager/test_monitor_setup.py b/tests/monitor/manager/test_monitor_setup.py index 9b043d1..1a92e57 100644 --- a/tests/monitor/manager/test_monitor_setup.py +++ b/tests/monitor/manager/test_monitor_setup.py @@ -273,4 +273,76 @@ def test_set_non_iso_data_readiness_raises(monitor_setup) -> None: with pytest.raises(ValueError): monitor_setup.data_readiness_duration = "Some non-conformant string" - \ No newline at end of file + + +def test_cron_schedule_for_analyzer(monitor_setup) -> None: + monitor_setup.config = FixedThresholdsConfig( + metric=DatasetMetric.classification_accuracy, + upper=0.75 + ) + monitor_setup.schedule = CronSchedule(cron="0 0 * * *") + monitor_setup.apply() + + assert monitor_setup.analyzer.schedule == CronSchedule( + cron="0 0 * * *" + ) + + monitor_setup.schedule = CronSchedule(cron="0 0 * * 1-5") + monitor_setup.apply() + + assert monitor_setup.analyzer.schedule == CronSchedule( + cron="0 0 * * 1-5" + ) + + monitor_setup.schedule = CronSchedule(cron="0 0 * * 6,0") + monitor_setup.apply() + + assert monitor_setup.analyzer.schedule == CronSchedule( + cron="0 0 * * 6,0" + ) + + monitor_setup.schedule = CronSchedule(cron="0 9-17 * * *") + monitor_setup.apply() + + assert monitor_setup.analyzer.schedule == CronSchedule( + cron="0 9-17 * * *" + ) + + monitor_setup.schedule = CronSchedule(cron="0 9,10,17 * * *") + monitor_setup.apply() + + assert monitor_setup.analyzer.schedule == CronSchedule( + cron="0 9,10,17 * * *" + ) + + monitor_setup.schedule = CronSchedule(cron="*/90 9,10,17 * * *") + monitor_setup.apply() + + assert monitor_setup.analyzer.schedule == CronSchedule( + cron="*/90 9,10,17 * * *" + ) + + monitor_setup.schedule = CronSchedule(cron="0 9,10,17 1,2,3 2,4,5 2,4") + monitor_setup.apply() + + assert monitor_setup.analyzer.schedule == CronSchedule( + cron="0 9,10,17 1,2,3 2,4,5 2,4" + ) + + # All below Must fail + + monitor_setup.schedule = CronSchedule(cron="* * * * *") # Every minute + with pytest.raises(ValueError): + monitor_setup.apply() + + monitor_setup.schedule = CronSchedule(cron="0 0 * * * *") # Too many fields + with pytest.raises(ValueError): + monitor_setup.apply() + + monitor_setup.schedule = CronSchedule(cron="1,2 0 * * *") # Less granular than 1h + with pytest.raises(ValueError): + monitor_setup.apply() + + monitor_setup.schedule = CronSchedule(cron="*/15 0 * * *") # every 15min + with pytest.raises(ValueError): + monitor_setup.apply() \ No newline at end of file diff --git a/whylabs_toolkit/helpers/cron_validators.py b/whylabs_toolkit/helpers/cron_validators.py new file mode 100644 index 0000000..b9e10b0 --- /dev/null +++ b/whylabs_toolkit/helpers/cron_validators.py @@ -0,0 +1,49 @@ +from dataclasses import dataclass + + +@dataclass +class SplitCron: + day_of_week: str + month: str + day_of_month: str + hour: str + minute: str + + +def split_cron_expression(cron: str) -> SplitCron: + """Split the cron expression into its components.""" + cron_slots = cron.split(" ") + if len(cron_slots) != 5: + raise ValueError("CronSchedule must have 5 fields.") + return SplitCron( + minute=cron_slots[0], + hour=cron_slots[1], + day_of_month=cron_slots[2], + month=cron_slots[3], + day_of_week=cron_slots[4], + ) + + +def _is_not_less_granular_than_1_hour(split_cron: SplitCron) -> bool: + """Check if the cron expression is less granular than 1 hour.""" + if split_cron.minute == "*": + return False + + for item in ["-", ","]: + if item in split_cron.minute: + return False + + if split_cron.minute.startswith("*/"): + try: + divisor = int(split_cron.minute.split("/")[1]) + if divisor < 60: + return False + except ValueError: + pass + + return True + + +def validate_cron_expression(cron: str) -> bool: + split_cron = split_cron_expression(cron) + return _is_not_less_granular_than_1_hour(split_cron=split_cron) diff --git a/whylabs_toolkit/monitor/manager/monitor_setup.py b/whylabs_toolkit/monitor/manager/monitor_setup.py index 3f714d6..b22eca6 100644 --- a/whylabs_toolkit/monitor/manager/monitor_setup.py +++ b/whylabs_toolkit/monitor/manager/monitor_setup.py @@ -31,7 +31,7 @@ def __init__(self, monitor_id: str, dataset_id: Optional[str] = None, config: Co self._monitor_mode: Optional[Union[EveryAnomalyMode, DigestMode]] = None self._monitor_actions: Optional[List[Union[GlobalAction, EmailRecipient, SlackWebhook, PagerDuty]]] = None - self._analyzer_schedule: Optional[FixedCadenceSchedule] = None + self._analyzer_schedule: Optional[Union[FixedCadenceSchedule, CronSchedule]] = None self._target_matrix: Optional[Union[ColumnMatrix, DatasetMatrix]] = None self._analyzer_config: Optional[ Union[ diff --git a/whylabs_toolkit/monitor/models/analyzer/analyzer.py b/whylabs_toolkit/monitor/models/analyzer/analyzer.py index 93d0973..f9eac4c 100644 --- a/whylabs_toolkit/monitor/models/analyzer/analyzer.py +++ b/whylabs_toolkit/monitor/models/analyzer/analyzer.py @@ -1,7 +1,7 @@ """Schema for analyses.""" from typing import Any, Dict, List, Optional, Union -from pydantic import BaseModel, Field, constr +from pydantic import BaseModel, Field, constr, validator from whylabs_toolkit.monitor.models.commons import NoExtrasBaseModel @@ -22,6 +22,7 @@ DisjunctionConfig, ) from .targets import ColumnMatrix, DatasetMatrix +from whylabs_toolkit.helpers.cron_validators import validate_cron_expression class Analyzer(NoExtrasBaseModel): @@ -57,8 +58,8 @@ class Analyzer(NoExtrasBaseModel): ] = Field( # noqa F722 None, description="A list of tags that are associated with the analyzer." ) - # disabling CronSchedule as it can be tricky on the BE - schedule: Optional[FixedCadenceSchedule] = Field( # Optional[Union[CronSchedule, FixedCadenceSchedule]] = Field( + + schedule: Optional[Union[FixedCadenceSchedule, CronSchedule]] = Field( None, description="A schedule for running the analyzer. If not set, the analyzer's considered disabled", ) @@ -100,6 +101,15 @@ class Analyzer(NoExtrasBaseModel): "monthly data.", ) + @validator("schedule", pre=True, always=True) + def validate_schedule( + cls, v: Optional[Union[FixedCadenceSchedule, CronSchedule]] + ) -> Optional[Union[FixedCadenceSchedule, CronSchedule]]: + """Validate the schedule.""" + if isinstance(v, CronSchedule) and not validate_cron_expression(v.cron): + raise ValueError("CronSchedule must be no less granular than 1 hour and must have 5 fields.") + return v + # NOT YET IMPLEMENTED: # ExperimentalConfig, # ColumnListChangeConfig, diff --git a/whylabs_toolkit/monitor/models/commons.py b/whylabs_toolkit/monitor/models/commons.py index 1225a89..58d5c36 100644 --- a/whylabs_toolkit/monitor/models/commons.py +++ b/whylabs_toolkit/monitor/models/commons.py @@ -6,7 +6,9 @@ from pydantic import BaseModel, Extra from pydantic.fields import Field -CRON_REGEX = "(@(annually|yearly|monthly|weekly|daily|hourly))|" "((((\\d+,)+\\d+|(\\d+(\\/|-)\\d+)|\\d+|\\*) ?){5,7})" +CRON_REGEX = ( + "(@(annually|yearly|monthly|weekly|daily|hourly))|" "((((\\d+,)+\\d+|(\\d+(\\/|-)\\d+)|\\d+|\\*|\\*/\\d+) ?){5,7})" +) DATASET_ID_REGEX = "[a-zA-Z0-9\\-_\\.]+" DATASET_ID_DEF = Field( @@ -50,7 +52,6 @@ class CronSchedule(NoExtrasBaseModel): exclusionRanges: Optional[List[TimeRange]] = Field( title="ExclusionRanges", description="The ranges of dates during which this Analyzer is NOT run." ) - # TODO: support other mode of configuring scheduling class Cadence(str, Enum):