Skip to content

Commit

Permalink
Parallel builds with concatenated .cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
Mariusz Glebocki committed Oct 30, 2023
1 parent b83b997 commit 6fac0c3
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 10 deletions.
93 changes: 93 additions & 0 deletions bin/verilator_includer2
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python3
# pylint: disable=C0114,C0209
#
# Copyright 2003-2023 by Wilson Snyder. This program is free software; you
# can redistribute it and/or modify the Verilator internals under the terms
# of either the GNU Lesser General Public License Version 3 or the Perl
# Artistic License Version 2.0.
#
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
######################################################################

import sys
from dataclasses import dataclass, field
from pathlib import Path

@dataclass(slots = True)
class BucketData:
bucket_id: int
size: int = 0
filenames: list[str] = field(default_factory=list)

def __iter__(self):
return iter((self.bucket_id, self.size, self.filenames))


def get_list(fn: Path) -> tuple[int, list[tuple[int, Path]]]:
total_size = 0
result: list[tuple[int, Path]] = []
files: list[str] = []
with fn.open("r") as f:
files = f.read().split()

for f in files:
if not f: continue
f = Path(f.strip())
size = f.stat().st_size
total_size += size
result.append((size, f))

return (total_size, result)


def main():
input_list_file = Path(sys.argv[1])
buckets_count = int(sys.argv[2])
if buckets_count <= 0:
raise ValueError(f"Arg 2: Expected buckets count, got: {sys.argv[2]}")
output_name_template = sys.argv[3]
if "%" not in output_name_template:
raise ValueError(f"Arg 3: template must contain '%': {sys.argv[3]}")

total_size, input_list = get_list(input_list_file)

ideal_bucket_size = total_size // buckets_count

huge_files_num = 0
huge_files_size = 0
for size, _ in input_list:
if size > ideal_bucket_size:
huge_files_num += 1
huge_files_size += size

ideal_bucket_size = max(1, total_size - huge_files_size) // max(1, buckets_count - huge_files_num)

buckets: list[BucketData] = [BucketData(i + 1) for i in range(buckets_count)]
for bucket in buckets:
while input_list:
next_size, next_fn = input_list[0]
diff_now = abs(ideal_bucket_size - bucket.size)
diff_next = abs(ideal_bucket_size - bucket.size - next_size)
if bucket.size == 0 or diff_now > diff_next:
bucket.size += next_size
bucket.filenames.append(str(next_fn))
input_list.pop(0)
else:
break

while input_list:
next_size, next_fn = input_list[0]
buckets[-1].size += next_size
buckets[-1].filenames.append(str(next_fn))
input_list.pop(0)

for bucket_id, size, filenames in sorted(buckets, key = lambda b: b.size, reverse = True):
# print(f"Bucket {bucket_id:>2} size: {size:>8}, distance from ideal: {ideal_bucket_size - size:>8}", file=sys.stderr)
output_list_file = Path(output_name_template.replace("%", str(bucket_id)))
with output_list_file.open("w") as f:
f.write("\n".join([f"#include <{fn}>" for fn in filenames]) + "\n")

return 0

if __name__ == "__main__":
sys.exit(main())
48 changes: 38 additions & 10 deletions include/verilated.mk.in
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ CFG_LDLIBS_THREADS = @CFG_LDLIBS_THREADS@

VERILATOR_COVERAGE = $(PERL) $(VERILATOR_ROOT)/bin/verilator_coverage
VERILATOR_INCLUDER = $(PYTHON3) $(VERILATOR_ROOT)/bin/verilator_includer
VERILATOR_INCLUDER2 = $(PYTHON3) $(VERILATOR_ROOT)/bin/verilator_includer2
VERILATOR_CCACHE_REPORT = $(PYTHON3) $(VERILATOR_ROOT)/bin/verilator_ccache_report

######################################################################
Expand Down Expand Up @@ -202,20 +203,46 @@ VK_GLOBAL_OBJS = $(addsuffix .o, $(VM_GLOBAL_FAST) $(VM_GLOBAL_SLOW))
$(VK_GLOBAL_OBJS): $(VM_PREFIX).mk

ifneq ($(VM_PARALLEL_BUILDS),1)
# Fast build for small designs: All .cpp files in one fell swoop. This
# saves total compute, but can be slower if only a little changes. It is
# also a lot slower for medium to large designs when the speed of the C
# compiler dominates, which in this mode is not parallelizable.
# Fast build for small designs: All .cpp files in one fell swoop. This
# saves total compute, but can be slower if only a little changes. It is
# also a lot slower for medium to large designs when the speed of the C
# compiler dominates, which in this mode is not parallelizable.

VK_OBJS += $(VM_PREFIX)__ALL.o
$(VM_PREFIX)__ALL.cpp: $(addsuffix .cpp, $(VM_FAST) $(VM_SLOW))
VK_OBJS += $(VM_PREFIX)__ALL.o

$(VM_PREFIX)__ALL.cpp: $(addsuffix .cpp, $(VM_FAST) $(VM_SLOW))
$(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@
all_cpp: $(VM_PREFIX)__ALL.cpp

.PHONY: all_cpp
all_cpp: $(VM_PREFIX)__ALL.cpp
else
# Parallel build: Each .cpp file by itself. This can be somewhat slower for
# very small designs and examples, but is a lot faster for large designs.

VK_OBJS += $(VK_OBJS_FAST) $(VK_OBJS_SLOW)
# TODO(mglb): rename to something related to .cpp files concatenation
# VM_PARALLEL_JOBS ?= 20

ifneq ($(filter-out 0 1,$(VM_PARALLEL_JOBS)),)

_VK_JOB_IDS := $(shell seq 1 ${VM_PARALLEL_JOBS})

.INTERMEDIATE: fast.list slow.list
fast.list: $(VM_FAST:%=%.cpp)
slow.list: $(VM_SLOW:%=%.cpp)

fast.list slow.list:
$(file >$@,$(strip $^))

$(foreach id,$(_VK_JOB_IDS),%_$(id)_$(VM_PARALLEL_JOBS).cpp): %.list
$(VERILATOR_INCLUDER2) $< ${VM_PARALLEL_JOBS} $(<:%.list=%)_%_${VM_PARALLEL_JOBS}.cpp

VK_OBJS_FAST = $(foreach job_id,${_VK_JOB_IDS},fast_${job_id}_${VM_PARALLEL_JOBS}.o)
VK_OBJS_SLOW = $(foreach job_id,${_VK_JOB_IDS},slow_${job_id}_${VM_PARALLEL_JOBS}.o)

endif

# Parallel build: Each .cpp file by itself. This can be somewhat slower for
# very small designs and examples, but is a lot faster for large designs.

VK_OBJS += $(VK_OBJS_FAST) $(VK_OBJS_SLOW)
endif

# When archiving just objects (.o), use single $(AR) run
Expand Down Expand Up @@ -344,6 +371,7 @@ debug-make::
@echo VM_GLOBAL_FAST: $(VM_GLOBAL_FAST)
@echo VM_GLOBAL_SLOW: $(VM_GLOBAL_SLOW)
@echo VM_PARALLEL_BUILDS: $(VM_PARALLEL_BUILDS)
@echo VM_PARALLEL_JOBS: $(VM_PARALLEL_JOBS)
@echo VM_PREFIX: $(VM_PREFIX)
@echo VM_SUPPORT_FAST: $(VM_SUPPORT_FAST)
@echo VM_SUPPORT_SLOW: $(VM_SUPPORT_SLOW)
Expand Down

0 comments on commit 6fac0c3

Please sign in to comment.