From f3ac6447b532af6e593db1be555efde28b6451de Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 19 Nov 2023 16:49:20 +0100 Subject: [PATCH 1/7] add benchmark comparison script --- benchmark/tools/compare.py | 135 +++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100755 benchmark/tools/compare.py diff --git a/benchmark/tools/compare.py b/benchmark/tools/compare.py new file mode 100755 index 00000000000..7619a4ce59f --- /dev/null +++ b/benchmark/tools/compare.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +import sys +import json +import argparse + +parser = argparse.ArgumentParser(description="Compare to Ginkgo benchmark outputs") +parser.add_argument("--outlier-threshold") +parser.add_argument("--output") +parser.add_argument("baseline") +parser.add_argument("comparison") +args = parser.parse_args() +keys = {"stencil", "size", "filename", "n", "r", "k", "m"} + + +def key_to_str(key: tuple) -> str: + """Restore a JSON output from a key tuple""" + result = {} + for key_name, key_val in zip(keys, key): + if key_val is not None: + result[key_name] = key_val + return json.dumps(result) + + +def parse_json_matrix(filename: str) -> dict: + """Parse a JSON file into a key -> test_case dict""" + parsed = json.load(open(filename)) + result = {} + assert isinstance(parsed, list) + for case in parsed: + assert isinstance(case, dict) + assert not keys.isdisjoint(case.keys()) + dict_key = tuple(case.get(key, None) for key in keys) + if dict_key in result.keys(): + print( + "WARNING: Duplicate key {}".format(key_to_str(dict_key)), + file=sys.stderr, + ) + result[dict_key] = case + return result + + +def warn_on_inconsistent_keys(baseline: dict, comparison: dict, context: str): + """Print a warning message for non-matching keys between baseline/comparison using the given context string""" + baseline_only = set(baseline.keys()).difference(comparison.keys()) + comparison_only = set(comparison.keys()).difference(baseline.keys()) + for dict_key in baseline_only: + print( + "WARNING: Key {} found in baseline only in context {}".format( + key_to_str(dict_key), context + ), + file=sys.stderr, + ) + for dict_key in comparison_only: + print( + "WARNING: Key {} found in comparison only in context {}".format( + key_to_str(dict_key), context + ), + file=sys.stderr, + ) + + +def ratio(baseline: int | float, comparison: int | float) -> float: + """Compares the ratio between baseline and comparison. For runtimes, this is the speedup.""" + return baseline / comparison + + +def compare_benchmark(baseline: dict, comparison: dict, context: str): + """Compares a handful of keys and component breakdowns recursively, writing them with a suffix to the output""" + comparison_keys = {"time", "storage", "iterations"} + suffix = ".ratio" + warn_on_inconsistent_keys(baseline, comparison, context) + result = {} + for key in baseline.keys(): + sub_context = "{}.{}".format(context, key) + if key == "components": + assert isinstance(baseline[key], dict) + assert isinstance(comparison[key], dict) + warn_on_inconsistent_keys(baseline[key], comparison[key], sub_context) + result[key + suffix] = { + sub_key: ratio(baseline[key][sub_key], comparison[key][sub_key]) + for sub_key in baseline[key] + } + elif isinstance(baseline[key], dict): + result[key] = compare_benchmark(baseline[key], comparison[key], sub_context) + elif key in comparison_keys: + result[key + suffix] = ratio(baseline[key], comparison[key]) + return result + + +def compare(baseline: dict, comparison: dict, context: str) -> dict: + """Compares a test case, keeping root-level values and recursing into benchmarks""" + warn_on_inconsistent_keys(baseline, comparison, context) + result = {} + for key in baseline.keys(): + # we don't have lists on the test case root level + assert not isinstance(baseline[key], list) + if isinstance(baseline[key], dict): + benchmark_result = {} + warn_on_inconsistent_keys( + baseline[key], comparison[key], "{}.{}".format(context, key) + ) + for benchmark_name in baseline[key].keys(): + if isinstance(baseline[key][benchmark_name], dict): + comparison_result = compare_benchmark( + baseline[key][benchmark_name], + comparison[key][benchmark_name], + "{}.{}.{}".format(context, key, benchmark_name), + ) + if len(comparison_result) > 0: + benchmark_result[benchmark_name] = comparison_result + if len(benchmark_result) > 0: + result[key] = benchmark_result + else: + # everything that's not a dict should only depend on the key in the root level + if baseline[key] != comparison[key]: + print( + "WARNING: Inconsistent value for {}: {} != {} in context {}".format( + key, baseline[key], comparison[key], context + ), + file=sys.stderr, + ) + result[key] = baseline[key] + return result + + +baseline_json = parse_json_matrix(args.baseline) +comparison_json = parse_json_matrix(args.comparison) +warn_on_inconsistent_keys(baseline_json, comparison_json, "root") + +results = [] + +for key in baseline_json.keys(): + results.append(compare(baseline_json[key], comparison_json[key], key_to_str(key))) + +json.dump(results, sys.stdout, indent=4) From 8178aa1ee1747223bc0d244177818945e1454add Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 19 Nov 2023 16:53:20 +0100 Subject: [PATCH 2/7] pretty-print final JSON output --- benchmark/utils/general.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index 241d2225938..e0045d8f417 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -289,7 +289,7 @@ void backup_results(json& results) return; } std::ofstream ofs(filenames[next]); - ofs << results; + ofs << std::setw(4) << results; next = 1 - next; } From 7bf1de8b1371cd63e89463b217cd859f67b639d5 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 20 Nov 2023 06:03:29 +0100 Subject: [PATCH 3/7] enable outlier detection and table output --- benchmark/tools/compare.py | 144 ++++++++++++++++++++++++++++++++++--- 1 file changed, 135 insertions(+), 9 deletions(-) diff --git a/benchmark/tools/compare.py b/benchmark/tools/compare.py index 7619a4ce59f..938d7aa65a6 100755 --- a/benchmark/tools/compare.py +++ b/benchmark/tools/compare.py @@ -2,23 +2,51 @@ import sys import json import argparse +import math +import pandas as pd parser = argparse.ArgumentParser(description="Compare to Ginkgo benchmark outputs") -parser.add_argument("--outlier-threshold") -parser.add_argument("--output") +parser.add_argument( + "--outliers", action="store_true", help="List outliers from the results" +) +parser.add_argument( + "--outlier-threshold", + type=float, + default=10, + help="At what percentage of deviation (above or below) should outliers be reported", +) +parser.add_argument( + "--outlier-count", + type=int, + default=1000, + help="How many outliers should be reported per benchmark", +) +parser.add_argument("--output", choices=["json", "csv", "markdown"], default="json") parser.add_argument("baseline") parser.add_argument("comparison") args = parser.parse_args() keys = {"stencil", "size", "filename", "n", "r", "k", "m"} +comparison_keys = {"time", "storage", "iterations"} +suffix = ".ratio" -def key_to_str(key: tuple) -> str: +def key_to_json(key: tuple) -> dict: """Restore a JSON output from a key tuple""" result = {} for key_name, key_val in zip(keys, key): if key_val is not None: result[key_name] = key_val - return json.dumps(result) + return result + + +def key_to_str(key: tuple) -> str: + """Restore a JSON output string from a key tuple""" + mapped = key_to_json(key) + if "filename" in mapped.keys(): + return mapped["filename"] + if "stencil" in mapped.keys(): + return "stencil({}, {})".format(mapped["stencil"], mapped["size"]) + return json.dumps(mapped).replace('"', "") def parse_json_matrix(filename: str) -> dict: @@ -66,8 +94,6 @@ def ratio(baseline: int | float, comparison: int | float) -> float: def compare_benchmark(baseline: dict, comparison: dict, context: str): """Compares a handful of keys and component breakdowns recursively, writing them with a suffix to the output""" - comparison_keys = {"time", "storage", "iterations"} - suffix = ".ratio" warn_on_inconsistent_keys(baseline, comparison, context) result = {} for key in baseline.keys(): @@ -123,13 +149,113 @@ def compare(baseline: dict, comparison: dict, context: str) -> dict: return result +def extract_benchmark_results( + input: dict, benchmarks: dict, case_key: tuple, context: str | None +): + for key, value in input.items(): + benchmark_name = key if context is None else "{}.{}".format(context, key) + if key in map(lambda x: x + suffix, comparison_keys): + benchmark_name = benchmark_name[: -len(suffix)] + if benchmark_name not in benchmarks.keys(): + benchmarks[benchmark_name] = [] + benchmarks[benchmark_name].append((case_key, value)) + elif isinstance(value, dict): + extract_benchmark_results(value, benchmarks, case_key, benchmark_name) + + +def is_outlier(value: float): + return math.fabs(math.log(value)) > math.log(1.0 + args.outlier_threshold / 100) + + baseline_json = parse_json_matrix(args.baseline) comparison_json = parse_json_matrix(args.comparison) warn_on_inconsistent_keys(baseline_json, comparison_json, "root") -results = [] +results = {} for key in baseline_json.keys(): - results.append(compare(baseline_json[key], comparison_json[key], key_to_str(key))) + results[key] = compare(baseline_json[key], comparison_json[key], key_to_str(key)) + +outliers = {} +if args.outliers: + benchmarks = {} + for key, value in results.items(): + extract_benchmark_results(value, benchmarks, key, None) + for benchmark_name, benchmark_results in benchmarks.items(): + outlier = sorted( + [ + (case_key, value) + for case_key, value in benchmark_results + if is_outlier(value) + ], + key=lambda x: math.fabs(math.log(x[1])), + reverse=True, + ) + outliers[benchmark_name] = outlier[: min(len(outlier), args.outlier_count)] -json.dump(results, sys.stdout, indent=4) +if args.output == "json": + print( + json.dumps( + { + "results": [value for _, value in results.items()], + "outliers": { + key: [ + {"value": ratio_value} + key_to_json(case_key) + for (case_key, ratio_value) in value + ] + for key, value in outliers.items() + if len(value) > 0 + }, + }, + indent=4, + ) + ) +else: + columns = ["benchmark", "testcase", "ratio"] + only_first = args.output == "markdown" + table = pd.DataFrame( + sum( + [ + [ + ( + key if i == 0 or not only_first else "", + key_to_str(value[0]), + value[1], + ) + for i, value in enumerate(values) + ] + for key, values in benchmarks.items() + ], + [], + ), + columns=columns, + ) + if args.output == "csv": + table.to_csv(sys.stdout, index=False) + else: + table.to_markdown(sys.stdout, index=False) + if args.outliers: + outlier_table = pd.DataFrame( + sum( + [ + [ + ( + key if i == 0 or not only_first else "", + key_to_str(value[0]), + value[1], + ) + for i, value in enumerate(values) + ] + for key, values in outliers.items() + ], + [], + ), + columns=columns, + ) + if len(outlier_table) > 0: + print("\n\nOutliers") + if args.output == "csv": + outlier_table.to_csv(sys.stdout, index=False) + else: + outlier_table.to_markdown(sys.stdout, index=False) + print() From e15ab8610b691e0cef74bbbe37d7137f1c9a1a24 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 20 Nov 2023 23:24:47 +0100 Subject: [PATCH 4/7] review updates - simplify things - add type annotations - fix outlier issues - test everything Co-authored-by: Gregor Olenik Co-authored-by: Marcel Koch --- benchmark/tools/compare.py | 271 +++++++++++------------ benchmark/tools/compare_test.py | 226 +++++++++++++++++++ benchmark/tools/compare_test_input1.json | 48 ++++ benchmark/tools/compare_test_input2.json | 48 ++++ benchmark/tools/compare_test_input3.json | 39 ++++ 5 files changed, 492 insertions(+), 140 deletions(-) create mode 100644 benchmark/tools/compare_test.py create mode 100644 benchmark/tools/compare_test_input1.json create mode 100644 benchmark/tools/compare_test_input2.json create mode 100644 benchmark/tools/compare_test_input3.json diff --git a/benchmark/tools/compare.py b/benchmark/tools/compare.py index 938d7aa65a6..9dd72c12538 100755 --- a/benchmark/tools/compare.py +++ b/benchmark/tools/compare.py @@ -1,90 +1,65 @@ #!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +# SPDX-License-Identifier: BSD-3-Clause import sys import json import argparse import math import pandas as pd +import tabulate # for pandas markdown output +from frozendict import frozendict + -parser = argparse.ArgumentParser(description="Compare to Ginkgo benchmark outputs") -parser.add_argument( - "--outliers", action="store_true", help="List outliers from the results" -) -parser.add_argument( - "--outlier-threshold", - type=float, - default=10, - help="At what percentage of deviation (above or below) should outliers be reported", -) -parser.add_argument( - "--outlier-count", - type=int, - default=1000, - help="How many outliers should be reported per benchmark", -) -parser.add_argument("--output", choices=["json", "csv", "markdown"], default="json") -parser.add_argument("baseline") -parser.add_argument("comparison") -args = parser.parse_args() keys = {"stencil", "size", "filename", "n", "r", "k", "m"} comparison_keys = {"time", "storage", "iterations"} suffix = ".ratio" -def key_to_json(key: tuple) -> dict: - """Restore a JSON output from a key tuple""" - result = {} - for key_name, key_val in zip(keys, key): - if key_val is not None: - result[key_name] = key_val - return result - - -def key_to_str(key: tuple) -> str: - """Restore a JSON output string from a key tuple""" - mapped = key_to_json(key) - if "filename" in mapped.keys(): - return mapped["filename"] - if "stencil" in mapped.keys(): - return "stencil({}, {})".format(mapped["stencil"], mapped["size"]) - return json.dumps(mapped).replace('"', "") +def sorted_key_intersection(a: dict, b: dict) -> list: + return sorted(set(a.keys()).intersection(b.keys()), key=str) def parse_json_matrix(filename: str) -> dict: """Parse a JSON file into a key -> test_case dict""" - parsed = json.load(open(filename)) + with open(filename) as file: + parsed = json.load(file) result = {} assert isinstance(parsed, list) for case in parsed: assert isinstance(case, dict) assert not keys.isdisjoint(case.keys()) - dict_key = tuple(case.get(key, None) for key in keys) + dict_key = frozendict( + {key: case[key] for key in keys.intersection(case.keys())} + ) if dict_key in result.keys(): print( - "WARNING: Duplicate key {}".format(key_to_str(dict_key)), + f"WARNING: Duplicate key {json.dumps(dict_key)}", file=sys.stderr, ) - result[dict_key] = case + result[frozendict(dict_key)] = case return result def warn_on_inconsistent_keys(baseline: dict, comparison: dict, context: str): """Print a warning message for non-matching keys between baseline/comparison using the given context string""" - baseline_only = set(baseline.keys()).difference(comparison.keys()) - comparison_only = set(comparison.keys()).difference(baseline.keys()) - for dict_key in baseline_only: + baseline_only = sorted(set(baseline.keys()).difference(comparison.keys())) + comparison_only = sorted(set(comparison.keys()).difference(baseline.keys())) + for key in baseline_only: print( - "WARNING: Key {} found in baseline only in context {}".format( - key_to_str(dict_key), context - ), + f"WARNING: Key {json.dumps(key) if isinstance(key, dict) else key} found in baseline only in context {context}", file=sys.stderr, ) - for dict_key in comparison_only: + for key in comparison_only: print( - "WARNING: Key {} found in comparison only in context {}".format( - key_to_str(dict_key), context - ), + f"WARNING: Key {json.dumps(key) if isinstance(key, dict) else key} found in comparison only in context {context}", file=sys.stderr, ) + for key in sorted_key_intersection(baseline, comparison): + if isinstance(baseline[key], dict): + assert isinstance(comparison[key], dict) + warn_on_inconsistent_keys( + baseline[key], comparison[key], f"{context}/{key}" + ) def ratio(baseline: int | float, comparison: int | float) -> float: @@ -92,45 +67,36 @@ def ratio(baseline: int | float, comparison: int | float) -> float: return baseline / comparison -def compare_benchmark(baseline: dict, comparison: dict, context: str): +def compare_benchmark(baseline: dict, comparison: dict) -> dict: """Compares a handful of keys and component breakdowns recursively, writing them with a suffix to the output""" - warn_on_inconsistent_keys(baseline, comparison, context) result = {} - for key in baseline.keys(): - sub_context = "{}.{}".format(context, key) + for key in sorted_key_intersection(baseline, comparison): if key == "components": assert isinstance(baseline[key], dict) assert isinstance(comparison[key], dict) - warn_on_inconsistent_keys(baseline[key], comparison[key], sub_context) result[key + suffix] = { sub_key: ratio(baseline[key][sub_key], comparison[key][sub_key]) for sub_key in baseline[key] } elif isinstance(baseline[key], dict): - result[key] = compare_benchmark(baseline[key], comparison[key], sub_context) + result[key] = compare_benchmark(baseline[key], comparison[key]) elif key in comparison_keys: result[key + suffix] = ratio(baseline[key], comparison[key]) return result -def compare(baseline: dict, comparison: dict, context: str) -> dict: +def compare(baseline: dict, comparison: dict) -> dict: """Compares a test case, keeping root-level values and recursing into benchmarks""" - warn_on_inconsistent_keys(baseline, comparison, context) result = {} - for key in baseline.keys(): + for key in sorted_key_intersection(baseline, comparison): # we don't have lists on the test case root level assert not isinstance(baseline[key], list) if isinstance(baseline[key], dict): benchmark_result = {} - warn_on_inconsistent_keys( - baseline[key], comparison[key], "{}.{}".format(context, key) - ) for benchmark_name in baseline[key].keys(): if isinstance(baseline[key][benchmark_name], dict): comparison_result = compare_benchmark( - baseline[key][benchmark_name], - comparison[key][benchmark_name], - "{}.{}.{}".format(context, key, benchmark_name), + baseline[key][benchmark_name], comparison[key][benchmark_name] ) if len(comparison_result) > 0: benchmark_result[benchmark_name] = comparison_result @@ -140,9 +106,7 @@ def compare(baseline: dict, comparison: dict, context: str) -> dict: # everything that's not a dict should only depend on the key in the root level if baseline[key] != comparison[key]: print( - "WARNING: Inconsistent value for {}: {} != {} in context {}".format( - key, baseline[key], comparison[key], context - ), + f"WARNING: Inconsistent value for {key}: {baseline[key]} != {comparison[key]}", file=sys.stderr, ) result[key] = baseline[key] @@ -151,9 +115,9 @@ def compare(baseline: dict, comparison: dict, context: str) -> dict: def extract_benchmark_results( input: dict, benchmarks: dict, case_key: tuple, context: str | None -): +) -> None: for key, value in input.items(): - benchmark_name = key if context is None else "{}.{}".format(context, key) + benchmark_name = key if context is None else f"{context}/{key}" if key in map(lambda x: x + suffix, comparison_keys): benchmark_name = benchmark_name[: -len(suffix)] if benchmark_name not in benchmarks.keys(): @@ -163,99 +127,126 @@ def extract_benchmark_results( extract_benchmark_results(value, benchmarks, case_key, benchmark_name) -def is_outlier(value: float): +def is_outlier(value: float, args) -> bool: + """returns true iff the value exceeds 1.0 above the outlier threshold""" return math.fabs(math.log(value)) > math.log(1.0 + args.outlier_threshold / 100) -baseline_json = parse_json_matrix(args.baseline) -comparison_json = parse_json_matrix(args.comparison) -warn_on_inconsistent_keys(baseline_json, comparison_json, "root") +def compare_main(args: list): + """Runs the comparison script""" + parser = argparse.ArgumentParser(description="Compare to Ginkgo benchmark outputs") + parser.add_argument( + "--outliers", action="store_true", help="List outliers from the results" + ) + parser.add_argument( + "--outlier-threshold", + type=float, + default=10, + help="At what percentage of deviation (above or below) should outliers be reported", + ) + parser.add_argument( + "--outlier-count", + type=int, + default=1000, + help="How many outliers should be reported per benchmark", + ) + parser.add_argument("--output", choices=["json", "csv", "markdown"], default="json") + parser.add_argument("baseline") + parser.add_argument("comparison") + args = parser.parse_args(args) + baseline_json = parse_json_matrix(args.baseline) + comparison_json = parse_json_matrix(args.comparison) + warn_on_inconsistent_keys(baseline_json, comparison_json, "root") -results = {} + results = {} -for key in baseline_json.keys(): - results[key] = compare(baseline_json[key], comparison_json[key], key_to_str(key)) + for key in set(baseline_json.keys()).intersection(comparison_json.keys()): + results[key] = compare(baseline_json[key], comparison_json[key]) -outliers = {} -if args.outliers: + outliers = {} benchmarks = {} for key, value in results.items(): extract_benchmark_results(value, benchmarks, key, None) - for benchmark_name, benchmark_results in benchmarks.items(): - outlier = sorted( - [ - (case_key, value) - for case_key, value in benchmark_results - if is_outlier(value) - ], - key=lambda x: math.fabs(math.log(x[1])), - reverse=True, - ) - outliers[benchmark_name] = outlier[: min(len(outlier), args.outlier_count)] + if args.outliers: + for benchmark_name, benchmark_results in benchmarks.items(): + outlier = sorted( + [ + (case_key, value) + for case_key, value in benchmark_results + if is_outlier(value, args) + ], + key=lambda x: math.fabs(math.log(x[1])), + reverse=True, + ) + outliers[benchmark_name] = outlier[: min(len(outlier), args.outlier_count)] -if args.output == "json": - print( - json.dumps( - { - "results": [value for _, value in results.items()], - "outliers": { - key: [ - {"value": ratio_value} + key_to_json(case_key) - for (case_key, ratio_value) in value - ] - for key, value in outliers.items() - if len(value) > 0 + if args.output == "json": + print( + json.dumps( + { + "results": [value for _, value in results.items()], + "outliers": { + key: [ + {"value": ratio_value, **case_key} + for (case_key, ratio_value) in value + ] + for key, value in outliers.items() + if len(value) > 0 + }, }, - }, - indent=4, + indent=4, + ) ) - ) -else: - columns = ["benchmark", "testcase", "ratio"] - only_first = args.output == "markdown" - table = pd.DataFrame( - sum( - [ - [ - ( - key if i == 0 or not only_first else "", - key_to_str(value[0]), - value[1], - ) - for i, value in enumerate(values) - ] - for key, values in benchmarks.items() - ], - [], - ), - columns=columns, - ) - if args.output == "csv": - table.to_csv(sys.stdout, index=False) else: - table.to_markdown(sys.stdout, index=False) - if args.outliers: - outlier_table = pd.DataFrame( + columns = ["benchmark", "testcase", "ratio"] + only_first = args.output == "markdown" + table = pd.DataFrame( sum( [ [ ( key if i == 0 or not only_first else "", - key_to_str(value[0]), + json.dumps(value[0]), value[1], ) for i, value in enumerate(values) ] - for key, values in outliers.items() + for key, values in benchmarks.items() ], [], ), columns=columns, ) - if len(outlier_table) > 0: - print("\n\nOutliers") - if args.output == "csv": - outlier_table.to_csv(sys.stdout, index=False) - else: - outlier_table.to_markdown(sys.stdout, index=False) - print() + if args.output == "csv": + table.to_csv(sys.stdout, index=False) + else: + table.to_markdown(sys.stdout, index=False) + if args.outliers: + outlier_table = pd.DataFrame( + sum( + [ + [ + ( + key if i == 0 or not only_first else "", + json.dumps(value[0]), + value[1], + ) + for i, value in enumerate(values) + ] + for key, values in outliers.items() + ], + [], + ), + columns=columns, + ) + if len(outlier_table) > 0: + print("\n\nOutliers") + if args.output == "csv": + outlier_table.to_csv(sys.stdout, index=False) + else: + outlier_table.to_markdown(sys.stdout, index=False) + print() + + +if __name__ == "__main__": + compare(sys.argv) diff --git a/benchmark/tools/compare_test.py b/benchmark/tools/compare_test.py new file mode 100644 index 00000000000..1b906c63b45 --- /dev/null +++ b/benchmark/tools/compare_test.py @@ -0,0 +1,226 @@ +import json +import compare +import os + +dir_path = os.path.dirname(os.path.realpath(__file__)) + + +def test_mismatch(capsys): + compare.compare_main( + [ + dir_path + "/../test/reference/blas.simple.stdout", + dir_path + "/../test/reference/spmv.matrix.stdout", + ] + ) + captured = capsys.readouterr() + ref_out = {"results": [], "outliers": {}} + + ref_err = """WARNING: Key {"n": 100} found in baseline only in context root +WARNING: Key {"filename": ""} found in comparison only in context root +""" + assert json.loads(captured.out) == ref_out + assert captured.err == ref_err + + +def test_simple(capsys): + compare.compare_main( + [ + dir_path + "/../test/reference/spmv.matrix.stdout", + dir_path + "/../test/reference/spmv.matrix.stdout", + ] + ) + captured = capsys.readouterr() + ref_out = { + "results": [ + { + "cols": 36, + "filename": "", + "nonzeros": 208, + "rows": 36, + "spmv": {"coo": {"storage.ratio": 1.0, "time.ratio": 1.0}}, + } + ], + "outliers": {}, + } + + assert json.loads(captured.out) == ref_out + assert captured.err == "" + + +def test_outliers(capsys): + compare.compare_main( + [ + "--outliers", + dir_path + "/compare_test_input1.json", + dir_path + "/compare_test_input2.json", + ] + ) + captured = capsys.readouterr() + ref_out = { + "results": [ + { + "cols": 36, + "filename": "mtx", + "nonzeros": 208, + "rows": 36, + "spmv": { + "coo": {"storage.ratio": 1.0, "time.ratio": 1.2}, + "csr": {"storage.ratio": 2.0, "time.ratio": 0.8}, + "ell": {"storage.ratio": 0.5, "time.ratio": 1.0}, + "sellp": {"storage.ratio": 1.0, "time.ratio": 1.11}, + "hybrid": {"storage.ratio": 1.0, "time.ratio": 1.01}, + }, + } + ], + "outliers": { + "spmv/coo/time": [{"value": 1.2, "filename": "mtx"}], + "spmv/csr/storage": [{"value": 2.0, "filename": "mtx"}], + "spmv/csr/time": [{"value": 0.8, "filename": "mtx"}], + "spmv/ell/storage": [{"value": 0.5, "filename": "mtx"}], + "spmv/sellp/time": [{"value": 1.11, "filename": "mtx"}], + }, + } + + assert json.loads(captured.out) == ref_out + assert captured.err == "" + + +def test_outliers_imited(capsys): + compare.compare_main( + [ + "--outliers", + "--outlier-count", + "0", + dir_path + "/compare_test_input1.json", + dir_path + "/compare_test_input2.json", + ] + ) + captured = capsys.readouterr() + ref_out = { + "results": [ + { + "cols": 36, + "filename": "mtx", + "nonzeros": 208, + "rows": 36, + "spmv": { + "coo": {"storage.ratio": 1.0, "time.ratio": 1.2}, + "csr": {"storage.ratio": 2.0, "time.ratio": 0.8}, + "ell": {"storage.ratio": 0.5, "time.ratio": 1.0}, + "sellp": {"storage.ratio": 1.0, "time.ratio": 1.11}, + "hybrid": {"storage.ratio": 1.0, "time.ratio": 1.01}, + }, + } + ], + "outliers": {}, + } + + assert json.loads(captured.out) == ref_out + assert captured.err == "" + + +def test_csv(capsys): + compare.compare_main( + [ + "--outliers", + "--output", + "csv", + dir_path + "/compare_test_input1.json", + dir_path + "/compare_test_input2.json", + ] + ) + captured = capsys.readouterr() + ref_out = """benchmark,testcase,ratio +spmv/coo/storage,"{""filename"": ""mtx""}",1.0 +spmv/coo/time,"{""filename"": ""mtx""}",1.2 +spmv/csr/storage,"{""filename"": ""mtx""}",2.0 +spmv/csr/time,"{""filename"": ""mtx""}",0.8 +spmv/ell/storage,"{""filename"": ""mtx""}",0.5 +spmv/ell/time,"{""filename"": ""mtx""}",1.0 +spmv/sellp/storage,"{""filename"": ""mtx""}",1.0 +spmv/sellp/time,"{""filename"": ""mtx""}",1.11 +spmv/hybrid/storage,"{""filename"": ""mtx""}",1.0 +spmv/hybrid/time,"{""filename"": ""mtx""}",1.01 + + +Outliers +benchmark,testcase,ratio +spmv/coo/time,"{""filename"": ""mtx""}",1.2 +spmv/csr/storage,"{""filename"": ""mtx""}",2.0 +spmv/csr/time,"{""filename"": ""mtx""}",0.8 +spmv/ell/storage,"{""filename"": ""mtx""}",0.5 +spmv/sellp/time,"{""filename"": ""mtx""}",1.11 + +""" + assert captured.out == ref_out + assert captured.err == "" + + +def test_md(capsys): + compare.compare_main( + [ + "--outliers", + "--output", + "markdown", + dir_path + "/compare_test_input1.json", + dir_path + "/compare_test_input2.json", + ] + ) + captured = capsys.readouterr() + ref_out = """| benchmark | testcase | ratio | +|:--------------------|:--------------------|--------:| +| spmv/coo/storage | {"filename": "mtx"} | 1 | +| spmv/coo/time | {"filename": "mtx"} | 1.2 | +| spmv/csr/storage | {"filename": "mtx"} | 2 | +| spmv/csr/time | {"filename": "mtx"} | 0.8 | +| spmv/ell/storage | {"filename": "mtx"} | 0.5 | +| spmv/ell/time | {"filename": "mtx"} | 1 | +| spmv/sellp/storage | {"filename": "mtx"} | 1 | +| spmv/sellp/time | {"filename": "mtx"} | 1.11 | +| spmv/hybrid/storage | {"filename": "mtx"} | 1 | +| spmv/hybrid/time | {"filename": "mtx"} | 1.01 | + +Outliers +| benchmark | testcase | ratio | +|:-----------------|:--------------------|--------:| +| spmv/coo/time | {"filename": "mtx"} | 1.2 | +| spmv/csr/storage | {"filename": "mtx"} | 2 | +| spmv/csr/time | {"filename": "mtx"} | 0.8 | +| spmv/ell/storage | {"filename": "mtx"} | 0.5 | +| spmv/sellp/time | {"filename": "mtx"} | 1.11 | +""" + assert captured.out == ref_out + assert captured.err == "" + + +def test_complex(capsys): + compare.compare_main( + [ + dir_path + "/compare_test_input3.json", + dir_path + "/compare_test_input3.json", + ] + ) + captured = capsys.readouterr() + ref_out = { + "results": [ + {"blas": {"axpy": {"time.ratio": 1.0}}, "k": 2, "m": 3, "n": 1, "r": 4}, + {"size": 100, "spmv": {"csr": {"time.ratio": 1.0}}, "stencil": "7pt"}, + { + "filename": "mtx", + "solver": { + "gmres": { + "apply": { + "components.ratio": {"foo": 1.0}, + "iterations.ratio": 1.0, + "time.ratio": 1.0, + }, + "generate": {"time.ratio": 1.0}, + } + }, + }, + ], + "outliers": {}, + } + + assert json.loads(captured.out) == ref_out + assert captured.err == "" diff --git a/benchmark/tools/compare_test_input1.json b/benchmark/tools/compare_test_input1.json new file mode 100644 index 00000000000..da7b190c270 --- /dev/null +++ b/benchmark/tools/compare_test_input1.json @@ -0,0 +1,48 @@ +[ + { + "filename": "mtx", + "spmv": { + "coo": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.2, + "repetitions": 10, + "completed": true + }, + "csr": { + "storage": 2000, + "max_relative_norm2": 1.0, + "time": 0.8, + "repetitions": 10, + "completed": true + }, + "ell": { + "storage": 500, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "sellp": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.11, + "repetitions": 10, + "completed": true + }, + "hybrid": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.01, + "repetitions": 10, + "completed": true + } + }, + "rows": 36, + "cols": 36, + "nonzeros": 208, + "optimal": { + "spmv": "coo" + } + } +] \ No newline at end of file diff --git a/benchmark/tools/compare_test_input2.json b/benchmark/tools/compare_test_input2.json new file mode 100644 index 00000000000..29a8d348618 --- /dev/null +++ b/benchmark/tools/compare_test_input2.json @@ -0,0 +1,48 @@ +[ + { + "filename": "mtx", + "spmv": { + "coo": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "csr": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "ell": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "sellp": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + }, + "hybrid": { + "storage": 1000, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + } + }, + "rows": 36, + "cols": 36, + "nonzeros": 208, + "optimal": { + "spmv": "coo" + } + } +] \ No newline at end of file diff --git a/benchmark/tools/compare_test_input3.json b/benchmark/tools/compare_test_input3.json new file mode 100644 index 00000000000..f317073d12d --- /dev/null +++ b/benchmark/tools/compare_test_input3.json @@ -0,0 +1,39 @@ +[ + { + "stencil": "7pt", + "size": 100, + "spmv": { + "csr": { + "time": 0.5 + } + } + }, + { + "n": 1, + "k": 2, + "m": 3, + "r": 4, + "blas": { + "axpy": { + "time": 100 + } + } + }, + { + "filename": "mtx", + "solver": { + "gmres": { + "apply": { + "time": 1.0, + "components": { + "foo": 2.0 + }, + "iterations": 10 + }, + "generate": { + "time": 2.0 + } + } + } + } +] \ No newline at end of file From b60a34da75715482b28960b8ba9f8dcaa4007230 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 21 Nov 2023 04:39:53 +0100 Subject: [PATCH 5/7] fix script mode --- benchmark/tools/compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/tools/compare.py b/benchmark/tools/compare.py index 9dd72c12538..8de109fe34a 100755 --- a/benchmark/tools/compare.py +++ b/benchmark/tools/compare.py @@ -249,4 +249,4 @@ def compare_main(args: list): if __name__ == "__main__": - compare(sys.argv) + compare_main(sys.argv) From 8fecafbfa8a222bc04f2cf730a05baa5fe705f55 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Wed, 22 Nov 2023 10:37:32 +0100 Subject: [PATCH 6/7] more stable output --- benchmark/tools/compare.py | 2 +- benchmark/tools/compare_test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/tools/compare.py b/benchmark/tools/compare.py index 8de109fe34a..e0f79e90988 100755 --- a/benchmark/tools/compare.py +++ b/benchmark/tools/compare.py @@ -160,7 +160,7 @@ def compare_main(args: list): results = {} - for key in set(baseline_json.keys()).intersection(comparison_json.keys()): + for key in sorted_key_intersection(baseline_json, comparison_json): results[key] = compare(baseline_json[key], comparison_json[key]) outliers = {} diff --git a/benchmark/tools/compare_test.py b/benchmark/tools/compare_test.py index 1b906c63b45..83e2ee5dbda 100644 --- a/benchmark/tools/compare_test.py +++ b/benchmark/tools/compare_test.py @@ -203,8 +203,6 @@ def test_complex(capsys): captured = capsys.readouterr() ref_out = { "results": [ - {"blas": {"axpy": {"time.ratio": 1.0}}, "k": 2, "m": 3, "n": 1, "r": 4}, - {"size": 100, "spmv": {"csr": {"time.ratio": 1.0}}, "stencil": "7pt"}, { "filename": "mtx", "solver": { @@ -218,6 +216,8 @@ def test_complex(capsys): } }, }, + {"blas": {"axpy": {"time.ratio": 1.0}}, "k": 2, "m": 3, "n": 1, "r": 4}, + {"size": 100, "spmv": {"csr": {"time.ratio": 1.0}}, "stencil": "7pt"}, ], "outliers": {}, } From c26f69fe65a9eea60566c50f89846ccb661d03de Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 27 Nov 2023 14:50:58 +0100 Subject: [PATCH 7/7] review update --- benchmark/tools/compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/tools/compare.py b/benchmark/tools/compare.py index e0f79e90988..f6ac5ae321a 100755 --- a/benchmark/tools/compare.py +++ b/benchmark/tools/compare.py @@ -36,7 +36,7 @@ def parse_json_matrix(filename: str) -> dict: f"WARNING: Duplicate key {json.dumps(dict_key)}", file=sys.stderr, ) - result[frozendict(dict_key)] = case + result[dict_key] = case return result @@ -128,7 +128,7 @@ def extract_benchmark_results( def is_outlier(value: float, args) -> bool: - """returns true iff the value exceeds 1.0 above the outlier threshold""" + """returns true iff the is more than the outlier threshold away from 1.0""" return math.fabs(math.log(value)) > math.log(1.0 + args.outlier_threshold / 100)