Merge pull request #22811 from nrainer-materialize/scalability/regres…

…sion-handling scalability framework: regression handling
MaterializeInc · Oct 31, 2023 · d897573 · d897573
2 parents 2ed76c6 + f15c833
commit d897573
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 6 deletions.
diff --git a/misc/python/materialize/scalability/result_analyzers.py b/misc/python/materialize/scalability/result_analyzers.py
@@ -20,8 +20,8 @@
 
 
 class DefaultResultAnalyzer(ResultAnalyzer):
-    def __init__(self, max_deviation_in_percent: float):
-        self.max_deviation_in_percent = max_deviation_in_percent
+    def __init__(self, max_deviation_as_percent_decimal: float):
+        self.max_deviation_as_percent_decimal = max_deviation_as_percent_decimal
 
     def determine_regression_in_workload(
         self,
@@ -97,7 +97,7 @@ def _filter_entries_above_threshold(
         return tps_per_endpoint.loc[
             # keep entries x% worse than the baseline
             tps_per_endpoint[df_totals_ext_cols.TPS_DIFF_PERC] * (-1)
-            > self.max_deviation_in_percent
+            > self.max_deviation_as_percent_decimal
         ]
 
     def collect_regressions(

diff --git a/misc/python/materialize/scalability/workload_executor.py b/misc/python/materialize/scalability/workload_executor.py
@@ -28,7 +28,8 @@
 from materialize.scalability.workloads import *  # noqa: F401 F403
 from materialize.scalability.workloads_test import *  # noqa: F401 F403
 
-MAX_RETRIES_ON_REGRESSION = 1
+# number of retries in addition to the first run
+MAX_RETRIES_ON_REGRESSION = 2
 
 
 class WorkloadExecutor:
@@ -90,6 +91,7 @@ def run_and_evaluate_workload_for_endpoint(
         baseline_result: WorkloadResult | None,
         try_count: int,
     ) -> RegressionOutcome | None:
+        workload_name = workload_cls.__name__
         other_endpoint_result = self.run_workload_for_endpoint(
             other_endpoint, workload_cls()
         )
@@ -98,14 +100,18 @@ def run_and_evaluate_workload_for_endpoint(
             return None
 
         outcome = self.result_analyzer.determine_regression_in_workload(
-            workload_cls.__name__,
+            workload_name,
             self.baseline_endpoint,
             other_endpoint,
             baseline_result,
             other_endpoint_result,
         )
 
         if outcome.has_regressions() and try_count < MAX_RETRIES_ON_REGRESSION:
+            print(
+                f"Potential regression in workload {workload_name} at endpoint {other_endpoint},"
+                f" triggering retry {try_count + 1} of {MAX_RETRIES_ON_REGRESSION}"
+            )
             return self.run_and_evaluate_workload_for_endpoint(
                 workload_cls, other_endpoint, baseline_result, try_count=try_count + 1
             )

diff --git a/test/scalability/mzcompose.py b/test/scalability/mzcompose.py
@@ -42,6 +42,8 @@
     Postgres(),
 ]
 
+REGRESSION_THRESHOLD_AS_PERCENT_DECIMAL = 0.2
+
 
 def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
     parser.add_argument(
@@ -261,7 +263,9 @@ def report_regression_result(
 
 
 def create_result_analyzer(_args: argparse.Namespace) -> ResultAnalyzer:
-    return DefaultResultAnalyzer(max_deviation_in_percent=0.1)
+    return DefaultResultAnalyzer(
+        max_deviation_as_percent_decimal=REGRESSION_THRESHOLD_AS_PERCENT_DECIMAL
+    )
 
 
 def upload_regressions_to_buildkite(outcome: RegressionOutcome) -> None: