forked from verilator/verilator
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Mariusz Glebocki
committed
Oct 24, 2023
1 parent
b83b997
commit da406e5
Showing
2 changed files
with
137 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#!/usr/bin/env python3 | ||
import sys | ||
from dataclasses import dataclass, field | ||
from pathlib import Path | ||
|
||
def get_list(fn: Path) -> tuple[int, list[tuple[int, Path]]]: | ||
total_size = 0 | ||
result: list[tuple[int, Path]] = [] | ||
files: list[str] = [] | ||
with fn.open("r") as f: | ||
files = f.readlines() | ||
|
||
for f in files: | ||
f = Path(f.strip()) | ||
size = f.stat().st_size | ||
total_size += size | ||
result.append((size, f)) | ||
files = [] | ||
|
||
return (total_size, result) | ||
|
||
|
||
def main(): | ||
input_list_file = Path(sys.argv[1]) | ||
buckets_count = int(sys.argv[2]) | ||
if buckets_count <= 0: | ||
raise ValueError(f"Arg 2: Expected bucket count, got: {sys.argv[2]}") | ||
output_name_template = sys.argv[3] | ||
if "%" not in output_name_template: | ||
raise ValueError(f"Arg 3: template must contain '%': {sys.argv[3]}") | ||
|
||
total_size, input_list = get_list(input_list_file) | ||
|
||
ideal_bucket_size = total_size // buckets_count | ||
|
||
huge_files_num = 0 | ||
huge_files_size = 0 | ||
for size, _ in input_list: | ||
if size > ideal_bucket_size: | ||
huge_files_num += 1 | ||
huge_files_size += size | ||
|
||
ideal_bucket_size = max(1, total_size - huge_files_size) // max(1, buckets_count - huge_files_num) | ||
|
||
@dataclass(slots = True) | ||
class BucketData: | ||
bucket_id: int | ||
size: int = 0 | ||
filenames: list[str] = field(default_factory=list) | ||
|
||
def __iter__(self): | ||
return iter((self.bucket_id, self.size, self.filenames)) | ||
|
||
buckets: list[BucketData] = [BucketData(i + 1) for i in range(buckets_count)] | ||
for bucket in buckets: | ||
while input_list: | ||
next_size, next_fn = input_list[0] | ||
diff_now = abs(ideal_bucket_size - bucket.size) | ||
diff_next = abs(ideal_bucket_size - bucket.size - next_size) | ||
if bucket.size == 0 or diff_now > diff_next: | ||
bucket.size += next_size | ||
bucket.filenames.append(str(next_fn)) | ||
input_list.pop(0) | ||
else: | ||
break | ||
|
||
while input_list: | ||
next_size, next_fn = input_list[0] | ||
buckets[-1].size += next_size | ||
buckets[-1].filenames.append(str(next_fn)) | ||
input_list.pop(0) | ||
|
||
# for size, fn in input_list: | ||
# buckets[0].size += size | ||
# buckets[0].filenames.append(str(fn)) | ||
# buckets.sort(key = lambda bd: bd.size) | ||
|
||
for bucket_id, size, filenames in sorted(buckets, key = lambda b: b.size, reverse = True): | ||
print(f"Bucket {bucket_id:>2} size: {size:>8}, distance from ideal: {ideal_bucket_size - size:>8}", file=sys.stderr) | ||
output_list_file = Path(output_name_template.replace("%", str(bucket_id))) | ||
print(output_list_file) | ||
with output_list_file.open("w") as f: | ||
f.write("\n".join(filenames) + "\n") | ||
|
||
return 0 | ||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters