diff --git a/demo/.gitignore b/demo/.gitignore new file mode 100644 index 00000000000000..917660a34812b7 --- /dev/null +++ b/demo/.gitignore @@ -0,0 +1 @@ +*.wasm \ No newline at end of file diff --git a/demo/benchmarks/README.md b/demo/benchmarks/README.md new file mode 100644 index 00000000000000..7bb1d7b47717e8 --- /dev/null +++ b/demo/benchmarks/README.md @@ -0,0 +1,24 @@ +# Benchmarking + +For creating some benchmarks for my B.Sc., I have used this python script. +You can find more information on the input code, and why I chose these benchmarks, by reading the `Evaluation` of my thesis at: https://github.com/TUM-DSE/research-work-archive/blob/main/archive/2023/summer/docs/bsc_rehde_hardware_assisted_memory_safety_for_webassembly.pdf + +# Convert Input C/LLVM IR Code to WASM + +To generate the input .wasm code files that are later provided to the python script, you will first have to compile the C or LLVM IR files with clang to wasm64. +Note that you should change some of the paths in the Makefiles, since they need to point to e.g. our custom clang or wasm_memsafety_rtlib.c. +Also note that the PolybenchC .wasm files should be generated elsewhere, this only contains some smaller custom tests. + +# Performing Benchmarks with Python Script + +This script can execute benchmarks, and also evaluate them (i.e. process data and create a matplotlib graph). + +The way this script functions is that it expects you to first create a `base_dir`, a working dir in which you place input (.wasm) files and where output files will be saved. + +Expected (input) file structure to run benchmarks: +- The `{base_dir}/wasmtime` directory should contain all wasmtime variants (e.g. `{base_dir}/wasmtime/wasmtime-mte-naive`) that you later specify on the command-line. +- The `{base_dir}/{benchmark_suite}/build` directory should contain all input wasm files in subdirectories named after the wasmtime variant (e.g. `{base_dir}/{benchmark_suite}/build/wasmtime-mte-naive`). + +Note that this python script will have to be executed in QEMU with MTE enabled, since it is meant to run MTE enabled wasmtime binaries. + +To see all possible and required command-line eoptions, just run the script with no options like `python3 benchmarks-script.py`. diff --git a/demo/benchmarks/benchmarks-script.py b/demo/benchmarks/benchmarks-script.py new file mode 100644 index 00000000000000..123162b46af46e --- /dev/null +++ b/demo/benchmarks/benchmarks-script.py @@ -0,0 +1,757 @@ +import argparse +import subprocess +import os +from datetime import datetime +from concurrent.futures import ThreadPoolExecutor +import csv +import statistics + +# Expected (input) file structure to run benchmarks: +# - The `{base_dir}/wasmtime` directory should contain all wasmtime variants (e.g. `{base_dir}/wasmtime/wasmtime-mte-naive`) +# - The `{base_dir}/{benchmark_suite}/build` directory should contain all input wasm files in subdirectories named after the wasmtime variant (e.g. `{base_dir}/{benchmark_suite}/build/wasmtime-mte-naive`) + +# Global state +base_dir = None +benchmark_suite = None +wasm_tools_binary = None +results_dir = None +wasm_dir = None +cwasm_dir = None +all_processed_results_dir = None +all_final_results_dir = None +chart_results_dir = None +wasmtime_binaries_dir = None + +# CLI INTERFACE +def main(): + parser = argparse.ArgumentParser( + description="Execute and evaluate benchmarks for the SafeWASM project") + + all_wasmtime_binaries = ["wasmtime-none", "wasmtime-mte-infra-only", "wasmtime-mte-no-tagging", "wasmtime-mte-naive", "wasmtime-mte-opt", "wasmtime-mte-opt-static", "wasmtime-mte-async", "wasmtime-pac"] + default_wasmtime_binaries = [x for x in all_wasmtime_binaries if x == "wasmtime-pac"] + + parser.add_argument('--base-dir', required=True, + help='The base/working directory, in which input files are expected and output files will be written') + + parser.add_argument('--benchmark-suite', choices=['polybench', 'sorting', 'pac', 'memory-tagging'], required=True, + help='The benchmark suite to run') + + parser.add_argument('--wasmtime-binaries', nargs='+', choices=all_wasmtime_binaries, required=True, + help='All wasmtime executables/binaries for which the benchmarks should be executed') + + parser.add_argument('--wasm-tools-binary', + help='A path to the wasm tools binary required for counting new instructions in .wasm') + + parser.add_argument('--compile', action='store_true', + help='Compile all .wasm files to .cwasm using the specified wasmtime binaries') + + parser.add_argument('--performance', action='store_true', + help='Execute all performance benchmarks using the specified wasmtime binaries') + + parser.add_argument('--binary-size', action='store_true', + help='Execute all binary size benchmarks using the specified wasmtime binaries') + + parser.add_argument('--count-insts', action='store_true', + help='Execute all instruction counting benchmarks using the specified wasmtime binaries') + + parser.add_argument('--evaluate', action='store_true', + help='Evaluate the raw benchmark data, i.e. process it and generate a chart') + + args = parser.parse_args() + + # Initialize global state + global base_dir, benchmark_suite, wasm_tools_binary, results_dir, wasm_dir, cwasm_dir, all_processed_results_dir, all_final_results_dir, chart_results_dir, wasmtime_binaries_dir + + base_dir = args.base_dir + benchmark_suite = args.benchmark_suite + wasm_tools_binary = args.wasm_tools_binary + + # Directories where input files are read and output files are written to + results_dir = os.path.join(base_dir, f'{benchmark_suite}/results') + wasm_dir = os.path.join(base_dir, f'{benchmark_suite}/build') + cwasm_dir = os.path.join(base_dir, f'{benchmark_suite}/compiled') + all_processed_results_dir = os.path.join(base_dir, f'{benchmark_suite}/all-processed-results') + all_final_results_dir = os.path.join(base_dir, f'{benchmark_suite}/all-final-results') + chart_results_dir = os.path.join(base_dir, f'{benchmark_suite}/chart-results') + wasmtime_binaries_dir = os.path.join(base_dir, f'wasmtime') + + # Create all directories so they be filled later + [os.makedirs(dir, exist_ok=True) for dir in [results_dir, wasm_dir, cwasm_dir, + all_processed_results_dir, all_final_results_dir, chart_results_dir]] + + + for wasmtime_executable in args.wasmtime_binaries: + if args.compile: + compile_all(wasmtime_executable) + + if args.performance: + match benchmark_suite: + case "sorting": + complete_perf_sorting_run(wasmtime_executable) + case _: + complete_perf_run(wasmtime_executable) + + if args.binary_size: + binary_size_complete_run(wasmtime_executable) + + if args.count_insts: + count_new_instructions_complete_run(wasmtime_executable) + + if args.evaluate: + generate_all_processed_results() + generate_all_final_results() + generate_all_charts() + + +# Unfortunately, branch-misses, branches, cache-misses are all not supported inside qemu +perf_metrics = "cycles,task-clock,page-faults,context-switches" + +# Number of times to run the command +num_runs = 5 + + +def get_wasm_files(wasmtime_executable): + """Helper to return a list of all input wasm files belonging to a wasmtime variant""" + + sub_dir = os.path.join(wasm_dir, wasmtime_executable) + return [os.path.join(sub_dir, file) for file in os.listdir(sub_dir)] + + +def get_cwasm_files(wasmtime_executable): + """Helper to return a list of all input (pre-compiled) cwasm files belonging to a wasmtime variant""" + + sub_dir = os.path.join(cwasm_dir, wasmtime_executable) + return [os.path.join(sub_dir, file) for file in os.listdir(sub_dir)] + + +def get_wasmtime_command(wasmtime_executable, command, extra_options, file): + """Helper to add all necessary flags to a wasmtime invocation (command can be 'run' or 'compile')""" + + if wasmtime_executable == 'wasmtime-none': + wasmtime_command = [os.path.join(wasmtime_binaries_dir, wasmtime_executable), + command, + "--wasm-features=memory64"] + extra_options + [file] + else: + wasmtime_command = [os.path.join(wasmtime_binaries_dir, wasmtime_executable), + command, "--cranelift-enable", "use_mte", + "--wasm-features=memory64,mem-safety"] + extra_options + [file] + return wasmtime_command + + +# == EXECUTING BENCHMARKS + +# === COMPILE WASM TO CWASM +def compile_file(wasmtime_executable, wasm_file): + """Compiles the input wasm file to a cwasm file using the defined wasmtime binary""" + + wasmtime_compile_command = get_wasmtime_command( + wasmtime_executable, 'compile', [], wasm_file) + + print(f"Executing: {wasmtime_compile_command}") + + subprocess.run(wasmtime_compile_command) + + +def compile_all(wasmtime_executable): + """Compiles all wasm files, which belong to a certain wasmtime executable, to cwasm files""" + + wasm_files = get_wasm_files(wasmtime_executable) + + dst_dir = os.path.join(cwasm_dir, wasmtime_executable) + os.makedirs(dst_dir, exist_ok=True) + os.chdir(dst_dir) + + with ThreadPoolExecutor(max_workers=len(wasm_files)) as executor: + for wasm_file in wasm_files: + executor.submit(compile_file, wasmtime_executable, wasm_file) + + +# === PERFORMANCE (CPU CYCLES) BENCHMARKING +def single_perf_run(iteration, wasmtime_executable, cwasm_file): + """Perform a single perf benchmark run. This function simply executes the cwasm binary, without any extra stdin/arguments provided to it""" + + perf_results_dir = os.path.join(results_dir, 'perf') + dst_dir = os.path.join(perf_results_dir, wasmtime_executable) + os.makedirs(dst_dir, exist_ok=True) + + dst_file = f'perf__{wasmtime_executable}__{os.path.basename(cwasm_file)}__iter_{str(iteration)}.txt' + perf_results_file = os.path.join(dst_dir, dst_file) + + # Construct the perf command + wasmtime_run_command = get_wasmtime_command( + wasmtime_executable, 'run', ['--allow-precompiled'], cwasm_file) + perf_command = ["perf", "stat", "-e", perf_metrics, + "-o", perf_results_file, "--"] + wasmtime_run_command + + current_time = datetime.now() + print(f'Time : {current_time}') + print(f"Executing: {perf_command}") + + subprocess.run(perf_command) + + print(f"Results :") + with open(perf_results_file, 'r') as file: + print(file.read()) + + +def complete_perf_run(wasmtime_executable): + """Use a specified wasmtime binary to run all benchmarks""" + + cwasm_files = get_cwasm_files(wasmtime_executable) + + for cwasm_file in cwasm_files: + print(f"=== Combination: {wasmtime_executable} {cwasm_file}") + + for i in range(num_runs): + single_perf_run(i, wasmtime_executable, cwasm_file) + + +# === BINARY SIZE BENCHMARKING +def binary_size_complete_run(wasmtime_executable): + """Find the binary sizes of .wasm and .cwasm files for a certain wasmtime binary""" + + dst_dir_binary_sizes = os.path.join(results_dir, 'binary-sizes') + os.makedirs(dst_dir_binary_sizes, exist_ok=True) + + # Get WASM (.wasm) binary sizes + wasm_dir = os.path.join(dst_dir_binary_sizes, 'wasm', wasmtime_executable) + os.makedirs(wasm_dir, exist_ok=True) + + for wasm_file in get_wasm_files(wasmtime_executable): + file_size = os.path.getsize(wasm_file) + + output_filename = f"binary_size__wasm__{wasmtime_executable}__{os.path.basename(wasm_file)}.txt" + output_path = os.path.join(wasm_dir, output_filename) + + print(f'Calculating wasm binary size: {output_path}') + + with open(output_path, 'w') as f: + f.write(str(file_size)) + + # Get AArch64 binary (.cwasm) sizes + cwasm_dir = os.path.join(dst_dir_binary_sizes, + 'cwasm', wasmtime_executable) + os.makedirs(cwasm_dir, exist_ok=True) + + for cwasm_file in get_cwasm_files(wasmtime_executable): + file_size = os.path.getsize(cwasm_file) + + output_filename = f"binary_size__cwasm__{wasmtime_executable}__{os.path.basename(cwasm_file)}.txt" + output_path = os.path.join(cwasm_dir, output_filename) + + print(f'Calculating cwasm binary size: {output_path}') + + with open(output_path, 'w') as f: + f.write(str(file_size)) + + +# === NUMBER OF NEW WASM INSTRUCTIONS +def count_new_instructions_complete_run(wasmtime_executable): + """Count the number of new instructions in the .wasm and .cwasm files. 'New' instructions are those from our extended wasm (e.g. segment.new, segment.free etc.)""" + + dst_dir = os.path.join(results_dir, 'count-insts') + os.makedirs(dst_dir, exist_ok=True) + + # WASM (.wasm) + wasm_dir = os.path.join(dst_dir, 'wasm', wasmtime_executable) + os.makedirs(wasm_dir, exist_ok=True) + + for wasm_file in get_wasm_files(wasmtime_executable): + # Convert .wasm to .wast + wasm_output = subprocess.check_output( + ['{wasm_tools_binary}', 'print', wasm_file], text=True) + + # Define a dictionary to store instruction counts + instruction_counts = { + "segment.new": wasm_output.count("segment.stack_new"), + "segment.free": wasm_output.count("segment.free") + wasm_output.count("segment.stack_free"), + "i64.pointer_sign": wasm_output.count("i64.pointer_sign"), + "i64.pointer_auth": wasm_output.count("i64.pointer_auth") + } + + # Save results to file + output_filename = f"count_wasm__{wasmtime_executable}__{os.path.basename(wasm_file)}.txt" + output_path = os.path.join(wasm_dir, output_filename) + + print(f'Counting wasm insts: {output_path}') + + with open(output_path, 'w') as f: + for instruction, count in instruction_counts.items(): + f.write(f"{instruction}: {count}\n") + + # AArch64 binary (.cwasm) + cwasm_dir = os.path.join(dst_dir, 'cwasm', wasmtime_executable) + os.makedirs(cwasm_dir, exist_ok=True) + + for cwasm_file in get_cwasm_files(wasmtime_executable): + # Convert .wasm to .wast + objdump_output = subprocess.check_output( + ['objdump', '-D', cwasm_file], text=True) + + # Define a dictionary to store instruction counts + instruction_counts = { + "irg": objdump_output.count("irg\t"), + "stg": objdump_output.count("stg\t"), + "st2g": objdump_output.count("st2g\t"), + "pacdza": objdump_output.count("pacdza\t"), + "autdza": objdump_output.count("autdza\t") + } + + # Save results to file + output_filename = f"count_cwasm__{wasmtime_executable}__{os.path.basename(cwasm_file)}.txt" + output_path = os.path.join(cwasm_dir, output_filename) + + print(f'Counting cwasm insts: {output_path}') + + with open(output_path, 'w') as f: + for instruction, count in instruction_counts.items(): + f.write(f"{instruction}: {count}\n") + + +# === SORTING BENCHMARKING +# Unsorted input array size +unsorted_input_array_size = 40000 +data = list(range(unsorted_input_array_size, 0, -1)) + +# TODO: we could abstract this (by passing the stdin and argumens as parameters), since most of this code is dupliated +def single_perf_sorting_run(iteration, wasmtime_executable, cwasm_file): + """This is a special version of the performance benchmark, because it provides some arguments and stdin to the wasmtime call""" + + perf_results_dir = os.path.join(results_dir, 'perf') + dst_dir = os.path.join(perf_results_dir, wasmtime_executable) + os.makedirs(dst_dir, exist_ok=True) + + dst_file = f'perf__{wasmtime_executable}__{os.path.basename(cwasm_file)}__iter_{str(iteration)}.txt' + perf_results_file = os.path.join(dst_dir, dst_file) + + # Convert the list of integers to a string and then encode it to bytes + input_data = "\n".join(map(str, data)) + "\n" + + # Construct the perf command + wasmtime_command = get_wasmtime_command(wasmtime_executable, 'run', [ + '--allow-precompiled'], cwasm_file) + [str(len(data))] + perf_command = ["perf", "stat", "-e", perf_metrics, + "-o", perf_results_file, "--"] + wasmtime_command + + current_time = datetime.now() + print(f'Time : {current_time}') + print(f"Executing: {perf_command}") + + # Run the sorter binary with array length as an argument and input_data as stdin + result = subprocess.run(perf_command, input=input_data, + text=True, capture_output=True) + + # Check for errors + if result.returncode != 0: + print(f"Error: {result.stderr}") + + print(f"Results :") + with open(perf_results_file, 'r') as file: + print(file.read()) + + +def complete_perf_sorting_run(wasmtime_executable): + """Performan all sorting benchmarks""" + + cwasm_files = get_cwasm_files(wasmtime_executable) + + for cwasm_file in cwasm_files: + print(f"Performance benchmarking: {wasmtime_executable} {cwasm_file}") + + for i in range(num_runs): + single_perf_sorting_run(i, wasmtime_executable, cwasm_file) + + +# NOTE: in tests, this didn't lead to expected results (all memory usage was basically equal, even though we'd expect MTE to use more. Maybe this is a problem with QEMU+MTE) +# == PEAK MEMORY USED +def single_memory_sorting_run(iteration, wasmtime_executable, cwasm_file): + """Measure the peak memory used by the input program""" + + memory_results_dir = os.path.join(results_dir, 'memory') + dst_dir = os.path.join(memory_results_dir, wasmtime_executable) + os.makedirs(dst_dir, exist_ok=True) + + dst_file = f'memory__{wasmtime_executable}__{os.path.basename(cwasm_file)}__iter_{str(iteration)}.txt' + memory_results_file = os.path.join(dst_dir, dst_file) + + # Convert the list of integers to a string and then encode it to bytes + input_data = "\n".join(map(str, data)) + "\n" + + # Construct the time command + wasmtime_command = get_wasmtime_command(wasmtime_executable, 'run', [ + '--allow-precompiled'], cwasm_file) + [str(len(data))] + time_command = ["/usr/bin/time", "-v", "-o", + memory_results_file] + wasmtime_command + + current_time = datetime.now() + print(f'Time : {current_time}') + print(f"Executing: {time_command}") + + # Run the sorter binary with array length as an argument and input_data as stdin + result = subprocess.run(time_command, input=input_data, + text=True, capture_output=True) + + # Check for errors + if result.returncode != 0: + print(f"Error: {result.stderr}") + + print(f"Results :") + with open(memory_results_file, 'r') as file: + print(file.read()) + + +def complete_memory_sorting_run(wasmtime_executable): + """Measure the peak memory for all wasm programs belonging to a wasmtime binary""" + + cwasm_files = get_cwasm_files(wasmtime_executable) + + for cwasm_file in cwasm_files: + print(f"Memory benchmarking: {wasmtime_executable} {cwasm_file}") + + for i in range(num_runs): + single_memory_sorting_run(i, wasmtime_executable, cwasm_file) + + +# == PROCESSING BENCHMARKS + +def get_input_programs(): + match benchmark_suite: + case "sorting": + return ["bubble_sort", + "merge_sort", "modified_merge_sort"] + case "pac": + return ["pac-1", "pac-2", "pac-3", "pac-4", "pac-5"] + case "memory-tagging": + return ["tagging-few-loops-large-segments", "tagging-few-loops-small-segments", + "tagging-many-loops-small-segments", "tagging-many-loops-large-segments"] + case "polybench": + return [ + "correlation-san", + "2mm-san", + "deriche-san", + "adi-san", + ] + + +def parse_perf_file(content): + """Parse a perf file to get the cpu cycles count and the elapsed time""" + lines = content.split("\n") + + cycles = None + elapsed_time = None + + for line in lines: + stripped_line = line.strip() + + if "cycles" in stripped_line: + # Splitting the line by spaces and getting the first item + cycles = int(stripped_line.split()[0].replace(',', '')) + + if "seconds time elapsed" in stripped_line: + # Splitting the line by spaces and getting the first item + elapsed_time = float(stripped_line.split()[0]) + + return cycles, elapsed_time + + +def generate_cpu_cycles_processed_results(input_programs): + # cpu-cycles.csv: + # wasmtime-variant,bubble_sort_0,...,bubble_sort_4,merge_sort_0,...,merge_sort_4,modified_merge_sort_0,...,modified_merge_sort_4 + # ...data... + # ... + + print("Generating processed cpu cycles results") + + perf_dir = os.path.join(results_dir, 'perf') + # get all subdirectories in perf, which are `wasmtime-*` variants + variants = [d for d in os.listdir( + perf_dir) if os.path.isdir(os.path.join(perf_dir, d))] + + header = ["wasmtime-variant"] + for input_program in input_programs: + for i in range(5): + header.append(f"{input_program}_{i}") + + results = [] + + # Iterate over subdirectories, i.e. wasmtime-variants + for variant in variants: + variant_path = os.path.join(perf_dir, variant) + variant_results = [variant] # Start with the variant name + for input_program in input_programs: + for i in range(5): + file_name = f"perf__{variant}__{input_program}.cwasm__iter_{i}.txt" + with open(os.path.join(variant_path, file_name), 'r') as f: + content = f.read() + # TODO: for some benchmarks suites/qemu environments, we'll have to fall back to the elapsed time, if the cycles aren't available + cycles, elapsed_time = parse_perf_file(content) + variant_results.append(cycles) + results.append(variant_results) + + # Write results to CSV + output_file = os.path.join(all_processed_results_dir, 'cpu-cycles.csv') + with open(output_file, 'w', newline='') as csvfile: + writer = csv.writer(csvfile) + writer.writerow(header) + for row in results: + writer.writerow(row) + + +def generate_all_processed_results(): + input_programs = get_input_programs() + generate_cpu_cycles_processed_results(input_programs) + + +def generate_cpu_cycles_final_result(input_programs): + # wasmtime-variant,bubble_sort_mean,bubble_sort_stdev,merge_sort_mean,merge_sort_stdev,modified_merge_sort_mean,modified_merge_sort_stdev + # ...data... + # ... + + print("Generating final cpu cycles results") + + # Read the data + processsed_result_file = os.path.join( + all_processed_results_dir, 'cpu-cycles.csv') + data = [] + with open(processsed_result_file, 'r') as csvfile: + reader = csv.reader(csvfile) + for row in reader: + data.append(row) + + header = data[0][1:] + variants = [row[0] for row in data[1:]] + + # Calculate mean and standard deviation + final_data = [['wasmtime-variant']] + for input_program in input_programs: + final_data[0].extend( + [f"{input_program}_mean", f"{input_program}_stdev"]) + + for i, variant in enumerate(variants): + row_data = [variant] + for j in range(1, len(header), 5): # assuming 5 iterations for each algorithm + # values = [int(data[i + 1][j + k]) for k in range(5)] + values = [float(data[i + 1][j + k]) for k in range(5)] + mean = statistics.mean(values) + std_dev = statistics.stdev(values) + row_data.extend([mean, std_dev]) + final_data.append(row_data) + + # Write to final results file + output_file = os.path.join(all_final_results_dir, 'cpu-cycles.csv') + with open(output_file, 'w', newline='') as csvfile: + writer = csv.writer(csvfile) + writer.writerows(final_data) + + +def generate_all_final_results(): + # .txt, where metric can be one of cpu-cycles, binary-size and count-instructions. Each file should roughly look like this (csv format): + # wasmtime-variant,bubble_sort_mean,bubble_sort_stdev,merge_sort_mean,merge_sort_stdev,modified_merge_sort_mean,modified_merge_sort_stdev + # ...data... + # ... + + input_programs = get_input_programs() + generate_cpu_cycles_final_result(input_programs) + + +def generate_cpu_cycles_chart(input_file, output_image_path): + import matplotlib.pyplot as plt + import pandas as pd + import numpy as np + + print("Generating chart") + + # TUM Colors + TUMBlue = "#0065BD" + TUMSecondaryBlue = "#005293" + TUMSecondaryBlue2 = "#003359" + TUMBlack = "#000000" + TUMWhite = "#FFFFFF" + TUMDarkGray = "#333333" + TUMGray = "#808080" + TUMLightGray = "#CCCCC6" + TUMAccentGray = "#DAD7CB" + TUMAccentOrange = "#E37222" + TUMAccentGreen = "#A2AD00" + TUMAccentLightBlue = "#98C6EA" + TUMAccentBlue = "#64A0C8" + colors = [TUMBlue, TUMAccentOrange, + TUMAccentGreen, TUMSecondaryBlue2, TUMDarkGray] + + def darken_color(color_code, factor=0.6): + """Return a darkened version of the given color code.""" + r, g, b = [int(color_code[i:i+2], 16) for i in (1, 3, 5)] + r, g, b = [int(c * factor) for c in (r, g, b)] + return "#{:02x}{:02x}{:02x}".format(r, g, b) + + darkened_colors = [darken_color(color) for color in colors] + + from pathlib import Path + import matplotlib as mpl + + # NOTE: for the same font to be used that is also used in the latex template I used for my thesis, I downloaded the font manually (there didn't seem any better way) + font_file = '{base_dir}/palatino.ttf' + if os.path.exists(font_file): + fpath = Path(mpl.get_data_path(), "/home/fritz/palatino.ttf") + font_properties = mpl.font_manager.FontProperties(fname=fpath) + else: + font_properties = mpl.font_manager.FontProperties(family='sans-serif') + + plt.rcParams['lines.linewidth'] = 0.1 + + # Read data + df = pd.read_csv(input_file) + + # Filtering and renaming Wasmtime Variants + match benchmark_suite: + case "sorting": + keep_variants = { + "wasmtime-none": "No MTE", + "wasmtime-mte-infra-only": "MTE Infrastructure Only", + "wasmtime-mte-no-tagging": "No Tagging Instructions", + "wasmtime-mte-naive": "MTE Naive STG", + "wasmtime-mte-opt-static": "MTE Optimizations", + "wasmtime-mte-async": "MTE Async Mode", + } + case "pac": + keep_variants = { + "wasmtime-none": "No PAC", + "wasmtime-pac": "PAC Enabled" + } + case "memory-tagging": + keep_variants = { + "wasmtime-mte-naive": "Naive STG", + "wasmtime-mte-opt": "Optimized ST2G", + "wasmtime-mte-opt-static": "Optimized ST2G + Loop Unrolling Threshold", + } + case "polybench": + keep_variants = { + "wasmtime-none": "No MTE", + "wasmtime-mte-infra-only": "MTE Infrastructure Only", + "wasmtime-mte-no-tagging": "No Tagging Instructions", + "wasmtime-mte-naive": "MTE Naive STG", + "wasmtime-mte-opt-static": "MTE Optimizations", + "wasmtime-mte-async": "MTE Async Mode", + } + + + df = df[df["wasmtime-variant"].isin(keep_variants.keys())] + df["wasmtime-variant"] = df["wasmtime-variant"].map(keep_variants) + + + # Format with the name as the first element of the tuple + match benchmark_suite: + case "sorting": + algorithms = [ + ("Bubble Sort", "bubble_sort_mean", "bubble_sort_stdev"), + ("Merge Sort", "merge_sort_mean", "merge_sort_stdev"), + ("Modified Merge Sort", "modified_merge_sort_mean", "modified_merge_sort_stdev") + ] + case "pac": + algorithms = [ + ("10000", "pac-1_mean", "pac-1_stdev"), + ("100000", "pac-2_mean", "pac-2_stdev"), + ("1000000", "pac-3_mean", "pac-3_stdev"), + ("10000000", "pac-4_mean", "pac-4_stdev"), + ("100000000", "pac-5_mean", "pac-5_stdev"), + ] + case "memory-tagging": + algorithms = [ + ("1600, 1600", "tagging-few-loops-small-segments_mean", "tagging-few-loops-small-segments_stdev"), + ("40000, 40000", "tagging-many-loops-large-segments_mean", "tagging-many-loops-large-segments_stdev"), + ("1600, 4000000", "tagging-few-loops-large-segments_mean", "tagging-few-loops-large-segments_stdev"), + ("4000000, 1600", "tagging-many-loops-small-segments_mean", "tagging-many-loops-small-segments_stdev"), + ] + case "polybench": + algorithms = [ + ("correlation", "correlation-san_mean", "correlation-san_stdev"), + ("2mm", "2mm-san_mean", "2mm-san_stdev"), + ("deriche", "deriche-san_mean", "deriche-san_stdev"), + ("adi", "adi-san_mean", "adi-san_stdev") + ] + + + bar_width = 0.1 + r = np.arange(len(algorithms)) + gap = 0.02 + + fig_width_in_inches = 6 + fig_height_in_inches = 4 + + fig, ax = plt.subplots(figsize=(fig_width_in_inches, fig_height_in_inches)) + + + match benchmark_suite: + case "sorting": + normalized_value = df.loc[df["wasmtime-variant"] == "No MTE", mean].values[0] + case "pac": + normalized_value = df.loc[df["wasmtime-variant"] == "No PAC", mean].values[0] + case "memory-tagging": + normalized_value = df.loc[df["wasmtime-variant"] == "Naive STG", mean].values[0] + case "polybench": + normalized_value = df.loc[df["wasmtime-variant"] == "No MTE", mean].values[0] + + + for idx, (alg_name, mean, stdev) in enumerate(algorithms): + offset = -(len(keep_variants) - 1) * (bar_width + gap) / 2 + for v_idx, variant in enumerate(keep_variants.values()): + y = df.loc[df["wasmtime-variant"] == + variant, mean].values[0] / normalized_value + y_err = (df.loc[df["wasmtime-variant"] == variant, + stdev].values[0] / normalized_value) if normalized_value != 0 else 0 + position = r[idx] + offset + v_idx * (bar_width + gap) + color_idx = v_idx % len(colors) + plt.bar(position, y, yerr=y_err, capsize=4, alpha=1, width=bar_width, + edgecolor=darkened_colors[color_idx], color=colors[color_idx], + label=variant if idx == 0 else "", error_kw=dict(lw=1, capthick=0.5)) + + + match benchmark_suite: + case "sorting": + x_axis_label = 'Sorting Algorithms' + case "pac": + x_axis_label = 'Size of Array' + case "memory-tagging": + x_axis_label = 'Loop Iterations, Size of Tagged Segments (in Bytes)' + case "polybench": + x_axis_label = None + + if benchmark_suite is not None: + ax.set_xlabel(x_axis_label, fontweight='bold', fontproperties=font_properties) + + ax.set_ylabel('Normalized Runtime', fontweight='bold', + fontproperties=font_properties) + + # NOTE: Whether the y axis should be log scale depends heavily on the results, but for some for now it makes more sense + match benchmark_suite: + case "sorting" | "polybench": + ax.set_yscale('log') + + ax.set_xticks(r) + ax.set_xticklabels( + [alg_name for alg_name, _, _ in algorithms], fontproperties=font_properties) + for label in ax.get_yticklabels(): + label.set_fontproperties(font_properties) + + ax.grid(which='both', axis='y', linestyle='-', linewidth=0.7, alpha=0.6) + ax.set_axisbelow(True) + + # NOTE: you can adjust the number of horizontal columns here manually (if there are too many variants to fit on one line) + plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=False, + shadow=False, prop=font_properties) + # plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=False, + # shadow=False, prop=font_properties, ncol=len(keep_variants)) + + plt.tight_layout() + plt.savefig(output_image_path, format='pdf') + plt.show() + + +def generate_all_charts(): + final_data_csv = os.path.join(all_final_results_dir, 'cpu-cycles.csv') + output_image = os.path.join(chart_results_dir, 'cpu-cycles.pdf') + generate_cpu_cycles_chart(final_data_csv, output_image) + + +if __name__ == "__main__": + main() diff --git a/demo/benchmarks/code/memory-tagging/Makefile b/demo/benchmarks/code/memory-tagging/Makefile new file mode 100644 index 00000000000000..64cf8fb96b2575 --- /dev/null +++ b/demo/benchmarks/code/memory-tagging/Makefile @@ -0,0 +1,28 @@ +CC=/scratch/fritz/src/safe-wasm/llvm-project/build/bin/clang +WASM_FLAGS=--target=wasm64-unknown-wasi --sysroot /scratch/martin/src/wasm/wasi-libc/sysroot -g -D_WASI_EMULATED_PROCESS_CLOCKS -lwasi-emulated-process-clocks -Wl,--stack-first -Wl,--initial-memory=104857600 -Wl,--max-memory=104857600 -Wl,-z,stack-size=83886080 +SAN_FLAGS=-march=wasm64-wasi+mem-safety -fsanitize=wasm-memsafety +CFLAGS=-O0 ${WASM_FLAGS} +# CFLAGS=-O2 ${WASM_FLAGS} +BUILD_DIR=build + +FILES=tagging-few-loops-large-segments \ + tagging-few-loops-small-segments \ + tagging-many-loops-large-segments \ + tagging-many-loops-small-segments + +TARGETS=$(addprefix ${BUILD_DIR}/, $(addsuffix .wasm, ${FILES})) + +all: ${TARGETS} + +${BUILD_DIR}/%.wasm: %.ll + ${CC} -o $@ $< ${CFLAGS} ${EXTRA_FLAGS} ${SAN_FLAGS} + +clean: + @ rm -f ${BUILD_DIR}/*.wasm + +${BUILD_DIR}: + mkdir -p $@ + +# Add the directory as a dependency to ensure it's created before compilation +${BUILD_DIR}/%.wasm: | ${BUILD_DIR} + diff --git a/demo/benchmarks/code/memory-tagging/tagging-few-loops-large-segments.ll b/demo/benchmarks/code/memory-tagging/tagging-few-loops-large-segments.ll new file mode 100644 index 00000000000000..bab5bdccc2f6aa --- /dev/null +++ b/demo/benchmarks/code/memory-tagging/tagging-few-loops-large-segments.ll @@ -0,0 +1,29 @@ +; File: segment_stack_test.ll + +declare ptr @llvm.wasm.segment.stack.new(ptr, i64) +declare void @llvm.wasm.segment.stack.free(ptr, ptr, i64) + +define i32 @__main_void() { +entry: + %static_size_array = alloca [1000000 x i32], align 16 + %iteration_count = alloca i32 + store i32 1600, i32* %iteration_count + + br label %loop_start + +loop_start: + %count = load i32, i32* %iteration_count + %is_done = icmp eq i32 %count, 0 + br i1 %is_done, label %loop_end, label %loop_body + +loop_body: + %segment_ptr = call ptr @llvm.wasm.segment.stack.new(ptr %static_size_array, i64 4000000) + call void @llvm.wasm.segment.stack.free(ptr %segment_ptr, ptr %static_size_array, i64 4000000) + + %new_count = sub i32 %count, 1 + store i32 %new_count, i32* %iteration_count + br label %loop_start + +loop_end: + ret i32 0 +} diff --git a/demo/benchmarks/code/memory-tagging/tagging-few-loops-small-segments.ll b/demo/benchmarks/code/memory-tagging/tagging-few-loops-small-segments.ll new file mode 100644 index 00000000000000..5bf3a741f7094d --- /dev/null +++ b/demo/benchmarks/code/memory-tagging/tagging-few-loops-small-segments.ll @@ -0,0 +1,29 @@ +; File: segment_stack_test.ll + +declare ptr @llvm.wasm.segment.stack.new(ptr, i64) +declare void @llvm.wasm.segment.stack.free(ptr, ptr, i64) + +define i32 @__main_void() { +entry: + %static_size_array = alloca [400 x i32], align 16 + %iteration_count = alloca i32 + store i32 1600, i32* %iteration_count + + br label %loop_start + +loop_start: + %count = load i32, i32* %iteration_count + %is_done = icmp eq i32 %count, 0 + br i1 %is_done, label %loop_end, label %loop_body + +loop_body: + %segment_ptr = call ptr @llvm.wasm.segment.stack.new(ptr %static_size_array, i64 1600) + call void @llvm.wasm.segment.stack.free(ptr %segment_ptr, ptr %static_size_array, i64 1600) + + %new_count = sub i32 %count, 1 + store i32 %new_count, i32* %iteration_count + br label %loop_start + +loop_end: + ret i32 0 +} diff --git a/demo/benchmarks/code/memory-tagging/tagging-many-loops-large-segments.ll b/demo/benchmarks/code/memory-tagging/tagging-many-loops-large-segments.ll new file mode 100644 index 00000000000000..a49acaaed1c96c --- /dev/null +++ b/demo/benchmarks/code/memory-tagging/tagging-many-loops-large-segments.ll @@ -0,0 +1,29 @@ +; File: segment_stack_test.ll + +declare ptr @llvm.wasm.segment.stack.new(ptr, i64) +declare void @llvm.wasm.segment.stack.free(ptr, ptr, i64) + +define i32 @__main_void() { +entry: + %static_size_array = alloca [10000 x i32], align 16 + %iteration_count = alloca i32 + store i32 40000, i32* %iteration_count + + br label %loop_start + +loop_start: + %count = load i32, i32* %iteration_count + %is_done = icmp eq i32 %count, 0 + br i1 %is_done, label %loop_end, label %loop_body + +loop_body: + %segment_ptr = call ptr @llvm.wasm.segment.stack.new(ptr %static_size_array, i64 40000) + call void @llvm.wasm.segment.stack.free(ptr %segment_ptr, ptr %static_size_array, i64 40000) + + %new_count = sub i32 %count, 1 + store i32 %new_count, i32* %iteration_count + br label %loop_start + +loop_end: + ret i32 0 +} diff --git a/demo/benchmarks/code/memory-tagging/tagging-many-loops-small-segments.ll b/demo/benchmarks/code/memory-tagging/tagging-many-loops-small-segments.ll new file mode 100644 index 00000000000000..551c329184ddb9 --- /dev/null +++ b/demo/benchmarks/code/memory-tagging/tagging-many-loops-small-segments.ll @@ -0,0 +1,29 @@ +; File: segment_stack_test.ll + +declare ptr @llvm.wasm.segment.stack.new(ptr, i64) +declare void @llvm.wasm.segment.stack.free(ptr, ptr, i64) + +define i32 @__main_void() { +entry: + %static_size_array = alloca [400 x i32], align 16 + %iteration_count = alloca i32 + store i32 4000000, i32* %iteration_count + + br label %loop_start + +loop_start: + %count = load i32, i32* %iteration_count + %is_done = icmp eq i32 %count, 0 + br i1 %is_done, label %loop_end, label %loop_body + +loop_body: + %segment_ptr = call ptr @llvm.wasm.segment.stack.new(ptr %static_size_array, i64 1600) + call void @llvm.wasm.segment.stack.free(ptr %segment_ptr, ptr %static_size_array, i64 1600) + + %new_count = sub i32 %count, 1 + store i32 %new_count, i32* %iteration_count + br label %loop_start + +loop_end: + ret i32 0 +} diff --git a/demo/benchmarks/code/pac-store-load-loops/Makefile b/demo/benchmarks/code/pac-store-load-loops/Makefile new file mode 100644 index 00000000000000..c59356acd398f6 --- /dev/null +++ b/demo/benchmarks/code/pac-store-load-loops/Makefile @@ -0,0 +1,24 @@ +CC=/scratch/fritz/src/safe-wasm/llvm-project/build/bin/clang +# WASM_FLAGS=--target=wasm64-unknown-wasi --sysroot /scratch/martin/src/wasm/wasi-libc/sysroot -g -D_WASI_EMULATED_PROCESS_CLOCKS -lwasi-emulated-process-clocks -Wl,--stack-first -Wl,--initial-memory=104857600 -Wl,--max-memory=104857600 -Wl,-z,stack-size=83886080 +WASM_FLAGS=--target=wasm64-unknown-wasi --sysroot /scratch/martin/src/wasm/wasi-libc/sysroot -g -D_WASI_EMULATED_PROCESS_CLOCKS -lwasi-emulated-process-clocks -Wl,--stack-first -Wl,--initial-memory=1677721600 -Wl,--max-memory=1677721600 -Wl,-z,stack-size=1342177280 +SAN_FLAGS=-march=wasm64-wasi+mem-safety -fsanitize=wasm-memsafety +#CFLAGS=-O0 ${WASM_FLAGS} +CFLAGS=-O2 ${WASM_FLAGS} +BUILD_DIR=build + +PAC_SOURCES=$(wildcard pac-*.c) +PAC_WASMS=$(PAC_SOURCES:%.c=${BUILD_DIR}/%.wasm) + +all: ${PAC_WASMS} + +${BUILD_DIR}/%.wasm: %.c + ${CC} -o $@ $< ${CFLAGS} ${EXTRA_FLAGS} ${SAN_FLAGS} + +clean: + @ rm -f ${BUILD_DIR}/*.wasm + +${BUILD_DIR}: + mkdir -p $@ + +# Add the directory as a dependency to ensure it's created before compilation +${BUILD_DIR}/%.wasm: | ${BUILD_DIR} diff --git a/demo/benchmarks/code/pac-store-load-loops/pac-1.c b/demo/benchmarks/code/pac-store-load-loops/pac-1.c new file mode 100644 index 00000000000000..b32f2e389d7d87 --- /dev/null +++ b/demo/benchmarks/code/pac-store-load-loops/pac-1.c @@ -0,0 +1,21 @@ +#include +#include + +int main(int argc, char **argv) { + size_t n = 10000; + + void* ptrArray[n]; + + // Store n pointers in the array + for (size_t i = 0; i < n; i++) { + ptrArray[i] = (void*) i; // casting the iterating variable to a pointer + } + + // Load the n pointers from the array and accumulate their values + size_t sum = 0; + for (size_t i = 0; i < n; i++) { + sum += (size_t) ptrArray[i]; + } + + return sum % 125; // modulo to make sure it's a valid return code +} diff --git a/demo/benchmarks/code/pac-store-load-loops/pac-2.c b/demo/benchmarks/code/pac-store-load-loops/pac-2.c new file mode 100644 index 00000000000000..55a9ba9b3d9f55 --- /dev/null +++ b/demo/benchmarks/code/pac-store-load-loops/pac-2.c @@ -0,0 +1,21 @@ +#include +#include + +int main(int argc, char **argv) { + size_t n = 100000; + + void* ptrArray[n]; + + // Store n pointers in the array + for (size_t i = 0; i < n; i++) { + ptrArray[i] = (void*) i; // casting the iterating variable to a pointer + } + + // Load the n pointers from the array and accumulate their values + size_t sum = 0; + for (size_t i = 0; i < n; i++) { + sum += (size_t) ptrArray[i]; + } + + return sum % 125; // modulo to make sure it's a valid return code +} diff --git a/demo/benchmarks/code/pac-store-load-loops/pac-3.c b/demo/benchmarks/code/pac-store-load-loops/pac-3.c new file mode 100644 index 00000000000000..74b960236e1d15 --- /dev/null +++ b/demo/benchmarks/code/pac-store-load-loops/pac-3.c @@ -0,0 +1,21 @@ +#include +#include + +int main(int argc, char **argv) { + size_t n = 1000000; + + void* ptrArray[n]; + + // Store n pointers in the array + for (size_t i = 0; i < n; i++) { + ptrArray[i] = (void*) i; // casting the iterating variable to a pointer + } + + // Load the n pointers from the array and accumulate their values + size_t sum = 0; + for (size_t i = 0; i < n; i++) { + sum += (size_t) ptrArray[i]; + } + + return sum % 125; // modulo to make sure it's a valid return code +} diff --git a/demo/benchmarks/code/pac-store-load-loops/pac-4.c b/demo/benchmarks/code/pac-store-load-loops/pac-4.c new file mode 100644 index 00000000000000..a49c86bdcd4bfb --- /dev/null +++ b/demo/benchmarks/code/pac-store-load-loops/pac-4.c @@ -0,0 +1,21 @@ +#include +#include + +int main(int argc, char **argv) { + size_t n = 10000000; + + void* ptrArray[n]; + + // Store n pointers in the array + for (size_t i = 0; i < n; i++) { + ptrArray[i] = (void*) i; // casting the iterating variable to a pointer + } + + // Load the n pointers from the array and accumulate their values + size_t sum = 0; + for (size_t i = 0; i < n; i++) { + sum += (size_t) ptrArray[i]; + } + + return sum % 125; // modulo to make sure it's a valid return code +} diff --git a/demo/benchmarks/code/pac-store-load-loops/pac-5.c b/demo/benchmarks/code/pac-store-load-loops/pac-5.c new file mode 100644 index 00000000000000..a4790ce6592f85 --- /dev/null +++ b/demo/benchmarks/code/pac-store-load-loops/pac-5.c @@ -0,0 +1,21 @@ +#include +#include + +int main(int argc, char **argv) { + size_t n = 100000000; + + void* ptrArray[n]; + + // Store n pointers in the array + for (size_t i = 0; i < n; i++) { + ptrArray[i] = (void*) i; // casting the iterating variable to a pointer + } + + // Load the n pointers from the array and accumulate their values + size_t sum = 0; + for (size_t i = 0; i < n; i++) { + sum += (size_t) ptrArray[i]; + } + + return sum % 125; // modulo to make sure it's a valid return code +} diff --git a/demo/benchmarks/code/sorting/Makefile b/demo/benchmarks/code/sorting/Makefile new file mode 100644 index 00000000000000..470a5811a4351d --- /dev/null +++ b/demo/benchmarks/code/sorting/Makefile @@ -0,0 +1,25 @@ +CC=/scratch/fritz/src/safe-wasm/llvm-project/build/bin/clang +WASM_FLAGS=--target=wasm64-unknown-wasi --sysroot /scratch/martin/src/wasm/wasi-libc/sysroot -g -D_WASI_EMULATED_PROCESS_CLOCKS -lwasi-emulated-process-clocks /scratch/fritz/src/safe-wasm/llvm-project/wasm_memsafety_rtlib.c +SAN_FLAGS=-march=wasm64-wasi+mem-safety -fsanitize=wasm-memsafety +CFLAGS=-O2 ${WASM_FLAGS} +BUILD_DIR=build + +all: ${BUILD_DIR}/bubble_sort.wasm ${BUILD_DIR}/merge_sort.wasm ${BUILD_DIR}/modified_merge_sort.wasm + +${BUILD_DIR}/bubble_sort.wasm: bubble_sort.c + ${CC} -o $@ $< ${CFLAGS} ${EXTRA_FLAGS} ${SAN_FLAGS} + +${BUILD_DIR}/merge_sort.wasm: merge_sort.c + ${CC} -o $@ $< ${CFLAGS} ${EXTRA_FLAGS} ${SAN_FLAGS} + +${BUILD_DIR}/modified_merge_sort.wasm: modified_merge_sort.c + ${CC} -o $@ $< ${CFLAGS} ${EXTRA_FLAGS} ${SAN_FLAGS} + +clean: + @ rm -f ${BUILD_DIR}/bubble_sort.wasm ${BUILD_DIR}/merge_sort.wasm ${BUILD_DIR}/modified_merge_sort.wasm + +${BUILD_DIR}: + mkdir -p $@ + +# Add the directory as a dependency to ensure it's created before compilation +${BUILD_DIR}/%.wasm: | ${BUILD_DIR} diff --git a/demo/benchmarks/code/sorting/bubble_sort.c b/demo/benchmarks/code/sorting/bubble_sort.c new file mode 100644 index 00000000000000..b73011972eb3d6 --- /dev/null +++ b/demo/benchmarks/code/sorting/bubble_sort.c @@ -0,0 +1,53 @@ +#include +#include + +// TODO: use size_t over int + +void bubble_sort(int* arr, int n) { + for (int i = 0; i < n-1; i++) { + for (int j = 0; j < n-i-1; j++) { + if (arr[j] > arr[j+1]) { + int temp = arr[j]; + arr[j] = arr[j+1]; + arr[j+1] = temp; + } + } + } +} + +int assert_sorted(int* arr, int n) { + for (int i = 0; i < n-1; i++) { + if (arr[i] > arr[i+1]) { + return 0; // Not sorted + } + } + return 1; // Sorted +} + +int main(int argc, char* argv[]) { + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + int n = atoi(argv[1]); + int* arr = (int*)malloc(n * sizeof(int)); + + // Read unsorted input array from stdin + for (int i = 0; i < n; i++) { + scanf("%d", &arr[i]); + } + + // Invoke SUT + bubble_sort(arr, n); + + if (!assert_sorted(arr, n)) { + fprintf(stderr, "Array is not sorted!\n"); + free(arr); + return 1; // Error + } + + free(arr); + + return 0; +} diff --git a/demo/benchmarks/code/sorting/merge_sort.c b/demo/benchmarks/code/sorting/merge_sort.c new file mode 100644 index 00000000000000..929c3452c77553 --- /dev/null +++ b/demo/benchmarks/code/sorting/merge_sort.c @@ -0,0 +1,114 @@ +#include +#include + +// TODO: use size_t over int + +void merge(int* arr, int l, int m, int r) { + int L_size = m - l + 1; + int R_size = r - m; + + // create temporary arrays on the heap + int* L = (int*)malloc(L_size * sizeof(int)); + int* R = (int*)malloc(R_size * sizeof(int)); + + if (!L || !R) { + fprintf(stderr, "Memory allocation failed\n"); + exit(1); + } + + // Copy data to temp arrays L[] and R[] + for (int i = 0; i < L_size; i++) + L[i] = arr[l + i]; + for (int j = 0; j < R_size; j++) + R[j] = arr[m + 1 + j]; + + // Merge the temp arrays back into arr[l..r] + int i = 0; + int j = 0; + int k = l; + while (i < L_size && j < R_size) { + if (L[i] <= R[j]) { + arr[k] = L[i]; + i++; + } else { + arr[k] = R[j]; + j++; + } + k++; + } + + // Copy the remaining elements of L[], if there are any + while (i < L_size) { + arr[k] = L[i]; + i++; + k++; + } + + // Copy the remaining elements of R[], if there are any + while (j < R_size) { + arr[k] = R[j]; + j++; + k++; + } + + free(L); + free(R); +} + +void merge_sort(int* arr, int l, int r) { + if (l < r) { + int m = l + (r - l) / 2; + merge_sort(arr, l, m); + merge_sort(arr, m + 1, r); + merge(arr, l, m, r); + } +} + +void bubble_sort(int* arr, int n) { + for (int i = 0; i < n-1; i++) { + for (int j = 0; j < n-i-1; j++) { + if (arr[j] > arr[j+1]) { + int temp = arr[j]; + arr[j] = arr[j+1]; + arr[j+1] = temp; + } + } + } +} + +int assert_sorted(int* arr, int n) { + for (int i = 0; i < n-1; i++) { + if (arr[i] > arr[i+1]) { + return 0; // Not sorted + } + } + return 1; // Sorted +} + +int main(int argc, char* argv[]) { + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + int n = atoi(argv[1]); + int* arr = (int*)malloc(n * sizeof(int)); + + // Read unsorted input array from stdin + for (int i = 0; i < n; i++) { + scanf("%d", &arr[i]); + } + + // Invoke SUT + merge_sort(arr, 0, n - 1); + + if (!assert_sorted(arr, n)) { + fprintf(stderr, "Array is not sorted!\n"); + free(arr); + return 1; // Error + } + + free(arr); + + return 0; +} diff --git a/demo/benchmarks/code/sorting/modified_merge_sort.c b/demo/benchmarks/code/sorting/modified_merge_sort.c new file mode 100644 index 00000000000000..484b91da3e059e --- /dev/null +++ b/demo/benchmarks/code/sorting/modified_merge_sort.c @@ -0,0 +1,103 @@ +#include +#include + +#define LIMIT 160 + +void perform_merge(int *arr, const int *L, const int *R, size_t l, size_t n1, size_t n2) { + size_t i = 0, j = 0, k = l; + + while (i < n1 && j < n2) { + if (L[i] <= R[j]) { + arr[k++] = L[i++]; + } else { + arr[k++] = R[j++]; + } + } + + while (i < n1) { + arr[k++] = L[i++]; + } + + while (j < n2) { + arr[k++] = R[j++]; + } +} + +void merge(int* arr, size_t l, size_t m, size_t r) { + size_t L_size = m - l + 1; + size_t R_size = r - m; + + // For small paritition sizes, allocate constant size on the stack to take advantage of our optimizations + if (L_size <= LIMIT && R_size <= LIMIT) { + int L[LIMIT], R[LIMIT]; + + // Copy data to temp arrays L[] and R[] + for (size_t i = 0; i < L_size; i++) + L[i] = arr[l + i]; + for (size_t j = 0; j < R_size; j++) + R[j] = arr[m + 1 + j]; + + perform_merge(arr, L, R, l, L_size, R_size); + } else { + int *L = (int *) malloc(L_size * sizeof(int)); + int *R = (int *) malloc(R_size * sizeof(int)); + + // Copy data to temp arrays L[] and R[] + for (size_t i = 0; i < L_size; i++) + L[i] = arr[l + i]; + for (size_t j = 0; j < R_size; j++) + R[j] = arr[m + 1 + j]; + + perform_merge(arr, L, R, l, L_size, R_size); + + free(L); + free(R); + } +} + +void modified_merge_sort(int* arr, size_t l, size_t r) { + if (l < r) { + size_t m = l + (r - l) / 2; + modified_merge_sort(arr, l, m); + modified_merge_sort(arr, m + 1, r); + merge(arr, l, m, r); + } +} + +int assert_sorted(int* arr, int n) { + for (int i = 0; i < n-1; i++) { + if (arr[i] > arr[i+1]) { + return 0; // Not sorted + } + } + return 1; // Sorted +} + +int main(int argc, char* argv[]) { + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + int n = atoi(argv[1]); + int* arr = (int*)malloc(n * sizeof(int)); + + // Read unsorted input array from stdin + for (int i = 0; i < n; i++) { + scanf("%d", &arr[i]); + } + + // Invoke SUT + modified_merge_sort(arr, 0, n - 1); + + if (!assert_sorted(arr, n)) { + fprintf(stderr, "Array is not sorted!\n"); + free(arr); + return 1; // Error + } + + free(arr); + + return 0; +} +