-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9f2e4f7
commit d8ef9d4
Showing
37 changed files
with
447 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
|
||
REPO_TOP="$(git rev-parse --show-toplevel)" | ||
|
||
benches=$(python3 "$REPO_TOP/infrastructure/all_scripts.py" | sort) | ||
|
||
for bench in $benches; do | ||
bash $REPO_TOP/$bench/deps.sh | ||
done | ||
|
||
for bench in $benches; do | ||
bash $REPO_TOP/$bench/input.sh | ||
done | ||
|
||
for bench in $benches; do | ||
python3 $REPO_TOP/infrastructure/run_dynamic.py $bench | ||
done | ||
|
||
touch "$REPO_TOP/infrastructure/target/collect_dynamic_logs.touch" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import lzma | ||
from collections import defaultdict | ||
from pathlib import Path | ||
import json | ||
import math | ||
|
||
from project_root import get_project_root | ||
|
||
def correct_base(path): | ||
return Path(path).is_relative_to('/benchmarks') | ||
|
||
def rebase(path): | ||
return Path(path).relative_to('/benchmarks') | ||
|
||
def readings_dict(readings): | ||
return {r['log_current_time']: r for r in readings} | ||
|
||
def is_shell(cmd): | ||
# non_shell = {'cat', 'tr', 'grep', 'sort', 'uniq', 'cut', 'awk', 'sed', 'rev', 'wc', 'convert', 'ffmpeg'} | ||
# is_shell_names = {'/bin/bash'} | ||
# assert cmd in non_shell | is_shell_names, f"unknown whether {cmd} is a shell" | ||
# return cmd in is_shell_names | ||
return cmd is not None and len(cmd) > 0 and 'bash' in cmd[0] | ||
|
||
def sum_counters(pid, at_time, processes, children, should_include, which_counter): | ||
s = defaultdict(int) | ||
def recurse(pid): | ||
if at_time in processes[pid]: | ||
record = processes[pid][at_time] | ||
if should_include(record): | ||
for k, v in which_counter(record).items(): | ||
s[k] += v | ||
for c in children[pid]: | ||
recurse(c) | ||
recurse(pid) | ||
return s | ||
|
||
def input_files(pid, at_time, processes, children): | ||
s = set() | ||
def recurse(pid): | ||
if at_time in processes[pid]: | ||
record = processes[pid][at_time] | ||
s.add(record['benchmark_input_file']) | ||
for file_path, _, _, mode, _ in record['full']['open_files']: | ||
known_modes = {'r', 'r+', 'w'} | ||
assert mode in known_modes, f"unknown mode {mode}" | ||
|
||
is_a_script = '.sh' in Path(file_path).suffixes | ||
if mode != 'w' and not is_a_script and correct_base(file_path): | ||
s.add(file_path) | ||
for c in children[pid]: | ||
recurse(c) | ||
recurse(pid) | ||
s = s - {None} | ||
return s | ||
|
||
def read_log_file(path): | ||
parents = defaultdict(lambda: None) | ||
children = defaultdict(set) | ||
processes = defaultdict(list) | ||
with lzma.open(path, 'r') as lines: | ||
for data in lines: | ||
data = json.loads(data) | ||
processes[data['pid']].append(data) | ||
pid = data['pid'] | ||
parent = data['parent'] | ||
children[parent].add(pid) | ||
parents[pid] = parent | ||
processes = defaultdict(lambda: None, {pid: readings_dict(rs) for pid, rs in processes.items()}) | ||
return processes, parents, children | ||
|
||
def print_statistics(pid, processes, parents, children): | ||
rs = processes[pid] | ||
|
||
max_uss = max( | ||
sum_counters(pid, log_time, processes, children, | ||
lambda record: True, | ||
lambda record: record['pfullmem'], | ||
) | ||
['uss'] | ||
for log_time in rs | ||
) | ||
|
||
all_input_files = set() | ||
for log_time in rs: | ||
all_input_files |= input_files(pid, log_time, processes, children) | ||
all_input_files = ";".join(str(rebase(p)) for p in all_input_files) | ||
|
||
max_reading = max(rs) | ||
tis = sum_counters(pid, max_reading, processes, children, | ||
lambda record: is_shell(record['cmdline']), | ||
lambda record: record['cpu_times'], | ||
) | ||
user = rs[max_reading]['cpu_times']['children_user'] | ||
system = rs[max_reading]['cpu_times']['children_system'] | ||
read_chars = rs[max_reading]['io_counters']['read_chars'] | ||
write_chars = rs[max_reading]['io_counters']['write_chars'] | ||
benchmark_script = rs[max_reading]['benchmark_script'] | ||
benchmark_script = None if benchmark_script is None else rebase(benchmark_script) | ||
print(benchmark_script, user, system, max_uss, read_chars, write_chars, tis['user'], tis['system'], all_input_files, sep=',') | ||
|
||
if __name__ == '__main__': | ||
process_logs = get_project_root() / 'infrastructure' / 'target' / 'process-logs' | ||
for path in process_logs.glob('*.jsonl.xz'): | ||
processes, parents, children = read_log_file(path) | ||
top_level = [pid for pid in processes if parents[parents[pid]] is None] | ||
for pid in top_level: | ||
print_statistics(pid, processes, parents, children) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import lzma | ||
import tempfile | ||
import argparse | ||
from pathlib import Path | ||
from typing import Optional | ||
import json | ||
from subprocess import check_output, run | ||
from collections import Counter | ||
import os | ||
from datetime import datetime, timezone | ||
|
||
|
||
from all_scripts import get_all_scripts | ||
from syntax_analysis import parse_shell_script, count_nodes | ||
from project_root import get_project_root | ||
|
||
def get_parser(): | ||
parser = argparse.ArgumentParser( | ||
prog='run_dynamic', | ||
description='runs the dynamic analysis', | ||
) | ||
parser.add_argument('bench', type=str) | ||
return parser | ||
|
||
def get_environment(root: Path, start_time: str, bench: str, data_log: str): | ||
env = os.environ.copy() | ||
dynamic_shell = root / 'infrastructure' / 'run_dynamic_shell.py' | ||
env['BENCHMARK_SHELL'] = str(dynamic_shell) | ||
env['BENCHMARK_EXPERIMENT_START'] = start_time | ||
env['BENCHMARK_PROCESS_LOG'] = data_log | ||
env['BENCHMARK_MORTEM_LOG'] = str( | ||
root / 'infrastructure' / 'target' / 'process-logs' / f'{start_time}-{bench}.mortem' | ||
) | ||
return env | ||
|
||
if __name__ == '__main__': | ||
parser = get_parser() | ||
args = parser.parse_args() | ||
root = get_project_root() | ||
bench = args.bench | ||
start_time = datetime.now(timezone.utc).isoformat() | ||
with tempfile.NamedTemporaryFile() as data_log: | ||
data_log = data_log.name | ||
env = get_environment(root=root, start_time=start_time, bench=bench, data_log=data_log) | ||
# write to an uncompressed file because it is faster | ||
run([root / bench / 'run.sh'], env=env) | ||
compressed_data_log = root / 'infrastructure' / 'target' / 'process-logs' / f'{start_time}-{bench}.jsonl.xz' | ||
with compressed_data_log.open('w') as stdout: | ||
run(['xz', '-6e', '-T0', '-c', data_log], stdout=stdout) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import os | ||
from datetime import datetime, timezone | ||
from itertools import chain | ||
import time | ||
import psutil | ||
import signal | ||
from pathlib import Path | ||
from typing import Optional | ||
import json | ||
from subprocess import run | ||
import sys | ||
import asyncio | ||
|
||
from all_scripts import get_all_scripts | ||
from syntax_analysis import parse_shell_script, count_nodes | ||
from project_root import get_project_root | ||
|
||
def data_json(p: psutil.Process, log_current_time: str, benchmark_experiment_start: str) -> str: | ||
parent_pid = None | ||
try: | ||
parent_pid = p.parent().pid | ||
except AttributeError: | ||
pass | ||
p = p.as_dict() | ||
times = p['cpu_times'] | ||
mem = p['memory_full_info'] | ||
io_counters = p['io_counters'] | ||
return json.dumps({ | ||
'pid': p['pid'], | ||
'parent': parent_pid, | ||
'benchmark_category': p['environ'].get('BENCHMARK_CATEGORY'), | ||
'benchmark_script': p['environ'].get('BENCHMARK_SCRIPT'), | ||
'benchmark_input_file': p['environ'].get('BENCHMARK_INPUT_FILE'), | ||
'benchmark_experiment_start': benchmark_experiment_start, | ||
'log_current_time': log_current_time, | ||
'cwd': p['cwd'], | ||
'cmdline': p['cmdline'], | ||
'create_time': p['create_time'], | ||
'cpu_times': { | ||
'user': times.user, | ||
'system': times.system, | ||
'children_user': times.children_user, | ||
'children_system': times.children_system, | ||
'iowait': times.iowait, | ||
}, | ||
'pfullmem': { | ||
'rss': mem.rss, 'vms': mem.vms, 'shared': mem.shared, 'text': mem.text, 'lib': mem.lib, 'data': mem.data, 'dirty': mem.dirty, 'uss': mem.uss, 'pss': mem.pss, 'swap': mem.swap, | ||
}, | ||
'io_counters': { | ||
'read_count': io_counters.read_count, 'write_count': io_counters.write_count, 'read_bytes': io_counters.read_bytes, 'write_bytes': io_counters.write_bytes, 'read_chars': io_counters.read_chars, 'write_chars': io_counters.write_chars, | ||
}, | ||
'num_fds': p['num_fds'], | ||
'full': p, # this does not provide field names for cpu_times and io_counters, etc. | ||
}) | ||
|
||
def write_process_data(parent: int, data_log, benchmark_experiment_start): | ||
log_current_time = datetime.now(timezone.utc).isoformat() | ||
parent = psutil.Process(parent) | ||
for p in chain(parent.children(recursive=True), [parent]): | ||
try: | ||
print(data_json(p, log_current_time, benchmark_experiment_start), file=data_log) | ||
except psutil.NoSuchProcess: | ||
pass | ||
|
||
async def collect_process_data(parent: int, data_log, benchmark_experiment_start): | ||
try: | ||
write_process_data(parent, data_log, benchmark_experiment_start) | ||
while True: | ||
await asyncio.sleep(0.05) | ||
write_process_data(parent, data_log, benchmark_experiment_start) | ||
except Exception as e: | ||
print(e, type(e)) | ||
|
||
async def run_and_collect(program, data_log: Path, mortem_log: Path, benchmark_experiment_start: Path): | ||
start_time = time.perf_counter() | ||
process = await asyncio.create_subprocess_exec(*program) | ||
pid = process.pid | ||
with data_log.open('a') as stdout: | ||
process_data = asyncio.create_task(collect_process_data(pid, stdout, benchmark_experiment_start)) | ||
await process.wait() | ||
end_time = time.perf_counter() | ||
process_data.cancel() | ||
with mortem_log.open('a') as mortem_log: | ||
print(benchmark_experiment_start, pid, end_time - start_time, sep=',', file=mortem_log) | ||
|
||
async def main(): | ||
program = sys.argv[1:] | ||
category = os.environ.get('BENCHMARK_CATEGORY') | ||
data_log = Path(os.environ.get('BENCHMARK_PROCESS_LOG')) | ||
mortem_log = Path(os.environ.get('BENCHMARK_MORTEM_LOG')) | ||
benchmark_experiment_start = os.environ.get('BENCHMARK_EXPERIMENT_START') | ||
await run_and_collect( | ||
program=program, | ||
data_log=data_log, | ||
mortem_log=mortem_log, | ||
benchmark_experiment_start=benchmark_experiment_start, | ||
) | ||
|
||
asyncio.run(main()) |
Empty file.
Oops, something went wrong.