-
Notifications
You must be signed in to change notification settings - Fork 465
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #22440 from nrainer-materialize/scalability/determ…
…ine-regression
- Loading branch information
Showing
10 changed files
with
365 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# Copyright Materialize, Inc. and contributors. All rights reserved. | ||
# | ||
# Use of this software is governed by the Business Source License | ||
# included in the LICENSE file at the root of this repository. | ||
# | ||
# As of the Change Date specified in that file, in accordance with | ||
# the Business Source License, use of this software will be governed | ||
# by the Apache License, Version 2.0. | ||
from materialize.scalability.endpoint import Endpoint | ||
|
||
|
||
class Regression: | ||
def __init__( | ||
self, | ||
workload_name: str, | ||
concurrency: int, | ||
count: int, | ||
tps: float, | ||
tps_baseline: float, | ||
tps_diff: float, | ||
tps_diff_percent: float, | ||
endpoint: Endpoint, | ||
): | ||
self.workload_name = workload_name | ||
self.concurrency = concurrency | ||
self.count = count | ||
self.tps = tps | ||
self.tps_baseline = tps_baseline | ||
assert tps_diff < 0, "Not a regression!" | ||
self.tps_diff = tps_diff | ||
self.tps_diff_percent = tps_diff_percent | ||
self.endpoint = endpoint | ||
|
||
def __str__(self) -> str: | ||
return ( | ||
f"Regression in workload '{self.workload_name}' at concurrency {self.concurrency} with {self.endpoint}:" | ||
f" {round(self.tps, 2)} tps vs. {round(self.tps_baseline, 2)} tps" | ||
f" ({round(self.tps_diff, 2)} tps; {round(100 * self.tps_diff_percent, 2)}%)" | ||
) | ||
|
||
|
||
class RegressionOutcome: | ||
def __init__( | ||
self, | ||
): | ||
self.regressions: list[Regression] = [] | ||
|
||
def has_regressions(self) -> bool: | ||
return len(self.regressions) > 0 | ||
|
||
def __str__(self) -> str: | ||
if not self.has_regressions(): | ||
return "No regressions" | ||
|
||
return "\n".join(f"* {x}" for x in self.regressions) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Copyright Materialize, Inc. and contributors. All rights reserved. | ||
# | ||
# Use of this software is governed by the Business Source License | ||
# included in the LICENSE file at the root of this repository. | ||
# | ||
# As of the Change Date specified in that file, in accordance with | ||
# the Business Source License, use of this software will be governed | ||
# by the Apache License, Version 2.0. | ||
|
||
|
||
from __future__ import annotations | ||
|
||
from materialize.scalability.endpoint import Endpoint | ||
from materialize.scalability.regression import RegressionOutcome | ||
from materialize.scalability.workload_result import WorkloadResult | ||
|
||
|
||
class ResultAnalyzer: | ||
def determine_regression( | ||
self, | ||
baseline_endpoint: Endpoint, | ||
results_by_workload_name: dict[str, dict[Endpoint, WorkloadResult]], | ||
) -> RegressionOutcome: | ||
regression_outcome = RegressionOutcome() | ||
for workload_name in results_by_workload_name.keys(): | ||
self.determine_regressions_in_workload( | ||
regression_outcome, | ||
baseline_endpoint, | ||
workload_name, | ||
results_by_workload_name[workload_name], | ||
) | ||
|
||
return regression_outcome | ||
|
||
def determine_regressions_in_workload( | ||
self, | ||
regression_outcome: RegressionOutcome, | ||
baseline_endpoint: Endpoint, | ||
workload_name: str, | ||
results_by_endpoint: dict[Endpoint, WorkloadResult], | ||
) -> bool: | ||
raise NotImplementedError |
120 changes: 120 additions & 0 deletions
120
misc/python/materialize/scalability/result_analyzers.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
# Copyright Materialize, Inc. and contributors. All rights reserved. | ||
# | ||
# Use of this software is governed by the Business Source License | ||
# included in the LICENSE file at the root of this repository. | ||
# | ||
# As of the Change Date specified in that file, in accordance with | ||
# the Business Source License, use of this software will be governed | ||
# by the Apache License, Version 2.0. | ||
|
||
|
||
import pandas as pd | ||
|
||
from materialize.scalability.endpoint import Endpoint | ||
from materialize.scalability.regression import Regression, RegressionOutcome | ||
from materialize.scalability.result_analyzer import ( | ||
ResultAnalyzer, | ||
) | ||
from materialize.scalability.workload_result import WorkloadResult | ||
|
||
COL_CONCURRENCY = "concurrency" | ||
COL_COUNT = "count" | ||
COL_TPS = "tps" | ||
COL_TPS_DIFF = "tps_diff" | ||
COL_TPS_DIFF_PERC = "tps_diff_perc" | ||
COL_TPS_BASELINE = "tps_x" | ||
COL_TPS_OTHER = "tps_y" | ||
|
||
|
||
class DefaultResultAnalyzer(ResultAnalyzer): | ||
def __init__(self, max_deviation_in_percent: float): | ||
self.max_deviation_in_percent = max_deviation_in_percent | ||
|
||
def determine_regressions_in_workload( | ||
self, | ||
regression_outcome: RegressionOutcome, | ||
baseline_endpoint: Endpoint, | ||
workload_name: str, | ||
results_by_endpoint: dict[Endpoint, WorkloadResult], | ||
) -> None: | ||
count_endpoints = len(results_by_endpoint) | ||
|
||
if count_endpoints <= 1: | ||
raise RuntimeError("Cannot compute regressions with a single target") | ||
|
||
if baseline_endpoint not in results_by_endpoint.keys(): | ||
raise RuntimeError("Regression baseline endpoint not in results!") | ||
|
||
other_endpoints = list(results_by_endpoint.keys() - {baseline_endpoint}) | ||
|
||
for other_endpoint in other_endpoints: | ||
self.determine_regression_in_workload( | ||
regression_outcome, | ||
workload_name, | ||
baseline_endpoint, | ||
other_endpoint, | ||
results_by_endpoint[baseline_endpoint], | ||
results_by_endpoint[other_endpoint], | ||
) | ||
|
||
def determine_regression_in_workload( | ||
self, | ||
regression_outcome: RegressionOutcome, | ||
workload_name: str, | ||
baseline_endpoint: Endpoint, | ||
other_endpoint: Endpoint, | ||
regression_baseline_result: WorkloadResult, | ||
other_result: WorkloadResult, | ||
) -> None: | ||
# tps = transactions per seconds (higher is better) | ||
|
||
columns_to_keep = [COL_COUNT, COL_CONCURRENCY, COL_TPS] | ||
tps_per_endpoint = regression_baseline_result.df_totals[columns_to_keep].merge( | ||
other_result.df_totals[columns_to_keep], on=[COL_COUNT, COL_CONCURRENCY] | ||
) | ||
|
||
tps_per_endpoint[COL_TPS_DIFF] = ( | ||
tps_per_endpoint[COL_TPS_OTHER] - tps_per_endpoint[COL_TPS_BASELINE] | ||
) | ||
tps_per_endpoint[COL_TPS_DIFF_PERC] = ( | ||
tps_per_endpoint[COL_TPS_DIFF] / tps_per_endpoint[COL_TPS_BASELINE] | ||
) | ||
|
||
entries_exceeding_threshold = tps_per_endpoint.loc[ | ||
# keep entries x% worse than the baseline | ||
tps_per_endpoint[COL_TPS_DIFF_PERC] * (-1) | ||
> self.max_deviation_in_percent | ||
] | ||
|
||
self.collect_regressions( | ||
regression_outcome, | ||
workload_name, | ||
baseline_endpoint, | ||
other_endpoint, | ||
entries_exceeding_threshold, | ||
) | ||
|
||
def collect_regressions( | ||
self, | ||
regression_outcome: RegressionOutcome, | ||
workload_name: str, | ||
baseline_endpoint: Endpoint, | ||
other_endpoint: Endpoint, | ||
entries_exceeding_threshold: pd.DataFrame, | ||
) -> None: | ||
for index, row in entries_exceeding_threshold.iterrows(): | ||
regression = Regression( | ||
workload_name, | ||
concurrency=int(row[COL_CONCURRENCY]), | ||
count=int(row[COL_COUNT]), | ||
tps=row[COL_TPS_OTHER], | ||
tps_baseline=row[COL_TPS_BASELINE], | ||
tps_diff=row[COL_TPS_DIFF], | ||
tps_diff_percent=row[COL_TPS_DIFF_PERC], | ||
endpoint=other_endpoint, | ||
) | ||
regression_outcome.regressions.append(regression) | ||
|
||
|
||
def row_count(data_frame: pd.DataFrame) -> int: | ||
return len(data_frame.index) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Copyright Materialize, Inc. and contributors. All rights reserved. | ||
# | ||
# Use of this software is governed by the Business Source License | ||
# included in the LICENSE file at the root of this repository. | ||
# | ||
# As of the Change Date specified in that file, in accordance with | ||
# the Business Source License, use of this software will be governed | ||
# by the Apache License, Version 2.0. | ||
|
||
import pandas as pd | ||
|
||
from materialize.scalability.workload import Workload | ||
|
||
|
||
class WorkloadResult: | ||
def __init__( | ||
self, workload: Workload, df_totals: pd.DataFrame, df_details: pd.DataFrame | ||
): | ||
self.workload = workload | ||
self.df_totals = df_totals | ||
self.df_details = df_details |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.