From 03c09d9262e53e2359b1f74e42cab20bc93b47a8 Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Wed, 31 Jan 2024 16:07:43 -0800 Subject: [PATCH] Add option to separate debug information (DWARF) from executable (#1973) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building Debug or RelWithDebInfo, the size of the binaries (i.e. `stablehlo-opt`) can be quite beefy. I noticed it can be up to **2GiB** in size and attaching GDB is quite slow -- in fact loading the process to run is slow. _This is much larger and noticeable than the Bazel build._ This change introduces a common idiom to speed up large binaries by separating the debug information (DWARF) into separate files (`*.dwo). In order to speedup GDB attachment, a gdb-index is created and stored in the file itself. One could achieve the same effect with `.gdbinit` settings however the management of the index files are never pruned and it's a bit cumbersome to rely on developers to set it up. For reference here is a possible `.gdbinit` ``` # History. set history filename ~/.gdb_history set history save on set history size 100000 # Makes multiple invocations much faster set index-cache on # Allow per-project gdbinit files set auto-load local-gdbinit on add-auto-load-safe-path / ``` **Note**: Looks like you also need to build MLIR with separate dwarfs as well. To be honest, the interplay between settings on LLVM and then on StableHLO are not very clear at times. Some settings seem to propagate based on how the previous object code was created and others did not. Specifically for GDB launch, you can see a very big time saving in the below benchmark. **76s vs 1.3s** ## Inline debug info **Size**: 3.2GiB ```shell ❯ ll -h bin/stablehlo-opt Permissions Size User Date Modified Name .rwxr-xr-x 3.2G 780412 31 Jan 18:14 bin/stablehlo-opt ``` Most of the space is the debug information. ```shell ❯ /google/bin/releases/protobuf-team/bloaty/bloaty bin/stablehlo-opt --allow_unsafe_non_google3_input FILE SIZE VM SIZE -------------- -------------- 60.2% 1.80Gi 0.0% 0 .debug_info 12.4% 378Mi 0.0% 0 .debug_str 10.3% 314Mi 0.0% 0 .debug_loclists 5.9% 180Mi 0.0% 0 .debug_line ``` **Benchmark** ``` # benchmark just running the build ❯ hyperfine './build/bin/stablehlo-opt --version' Benchmark 1: ./build/bin/stablehlo-opt --version Time (mean ± σ): 181.6 ms ± 3.8 ms [User: 73.4 ms, System: 108.2 ms] Range (min … max): 173.8 ms … 188.3 ms 16 runs # benchark with GDB ❯ hyperfine 'gdb -ex run --args ./build/bin/stablehlo-opt --version' --warmup 1 --runs 3 Benchmark 1: gdb -ex run --args ./build/bin/stablehlo-opt --version Time (mean ± σ): 74.063 s ± 2.381 s [User: 71.044 s, System: 3.958 s] Range (min … max): 72.361 s … 76.784 s 3 runs ``` ## With separate debug info **Size**: 847M ```shell ❯ ll -h build/bin/stablehlo-opt Permissions Size User Date Modified Name .rwxr-xr-x 847M 780412 31 Jan 17:29 build/bin/stablehlo-opt ```` Much of the space is now a `gdb-index` to make attaching to the debugger much faster ```shell ❯ /google/bin/releases/protobuf-team/bloaty/bloaty build/bin/stablehlo-opt --allow_unsafe_non_google3_input FILE SIZE VM SIZE -------------- -------------- 38.0% 306Mi 0.0% 0 .gdb_index 24.2% 195Mi 0.0% 0 .debug_addr 20.6% 166Mi 0.0% 0 .debug_line 6.5% 52.5Mi 55.5% 52.5Mi .text ``` **Benchark**: ``` # benchmark just running the build ❯ hyperfine './build/bin/stablehlo-opt --version' Benchmark 1: ./build/bin/stablehlo-opt --version Time (mean ± σ): 38.6 ms ± 1.6 ms [User: 15.7 ms, System: 23.1 ms] Range (min … max): 36.5 ms … 44.6 ms 68 runs # benchark with GDB ❯ hyperfine 'gdb -ex run --args ./build/bin/stablehlo-opt --version' Benchmark 1: gdb -ex run --args ./build/bin/stablehlo-opt --version Time (mean ± σ): 1.401 s ± 0.037 s [User: 1.532 s, System: 0.883 s] Range (min … max): 1.345 s … 1.445 s 10 runs ``` --- CMakeLists.txt | 18 ++++++++++++++++++ README.md | 24 ++++++++++++++++++++++++ build_tools/build_mlir.sh | 1 + cmake/SetupSanitizers.cmake | 2 +- 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d36b82edf04..2af226099f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,8 @@ option(STABLEHLO_BUILD_EMBEDDED "Build StableHLO as part of another project" OFF option(STABLEHLO_ENABLE_BINDINGS_PYTHON "Enables StableHLO Python bindings" OFF) option(STABLEHLO_ENABLE_STRICT_BUILD "Build StableHLO with strict warnings and warnings as errors" OFF) option(STABLEHLO_ENABLE_SANITIZER "Enable a sanitizer [OFF, address]" OFF) +option(STABLEHLO_ENABLE_SPLIT_DWARF "Enable split DWARF if the platform supports it" OFF) +option(STABLEHLO_ENABLE_LLD "Use LLD as the linker if available" OFF) #------------------------------------------------------------------------------- # Project setup and globals @@ -128,6 +130,22 @@ if(LLVM_ENABLE_ZLIB) find_package(ZLIB) endif() +#------------------------------------------------------------------------------- +# Performance configuration +#------------------------------------------------------------------------------- + +include(CheckCXXCompilerFlag) +include(CheckLinkerFlag) +add_link_options("$<$:-fuse-ld=lld>") +if(STABLEHLO_ENABLE_SPLIT_DWARF) + check_cxx_compiler_flag(-gsplit-dwarf STABLEHLO_SUPPORTS_SPLIT_DWARF) + add_compile_options("$<$:-gsplit-dwarf;-ggnu-pubnames>") + check_linker_flag(CXX "-Wl,--gdb-index" STABLEHLO_SUPPORTS_GDB_INDEX) + # If we set LLD it doesn't seem to affect the check_linker_flag above. + # Account for it with the generator expression OR + add_link_options("$<$,$>:-Wl,--gdb-index>") +endif() + include(TableGen) include(AddLLVM) include(AddMLIR) diff --git a/README.md b/README.md index 83fe996bae2..d630e776b57 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,30 @@ Here's how to build the StableHLO repo on Linux or macOS: -DMLIR_DIR=${PWD}/../llvm-build/lib/cmake/mlir ``` + If you are actively developing StableHLO, you may want the following additional + CMake settings: + + ```shell + cmake .. -GNinja \ + -DSTABLEHLO_ENABLE_LLD=ON \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DLLVM_ENABLE_ASSERTIONS=ON \ + -DSTABLEHLO_ENABLE_BINDINGS_PYTHON=OFF \ + -DSTABLEHLO_ENABLE_SPLIT_DWARF=ON \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DSTABLEHLO_ENABLE_SANITIZER=address \ + -DMLIR_DIR=${PWD}/../llvm-build/lib/cmake/mlir + ``` + + This will enable debug symbols and ccache, which can speed up incremental + builds. It also creates a GDB index file in the binary to speed up + debugging. + + If you build MLIR using the script above it should also set by default + `LLVM_USE_SPLIT_DWARF` which does the majority of the size saving for + the binary and should also be set. + 7. Now you can make sure it works by running some tests: ```sh diff --git a/build_tools/build_mlir.sh b/build_tools/build_mlir.sh index e56183e65ac..75f6ae51590 100755 --- a/build_tools/build_mlir.sh +++ b/build_tools/build_mlir.sh @@ -56,6 +56,7 @@ cmake -GNinja \ -DLLVM_BUILD_TOOLS=OFF \ -DLLVM_INCLUDE_TESTS=OFF \ -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \ + -DLLVM_USE_SPLIT_DWARF=ON \ -DLLVM_ENABLE_ASSERTIONS=ON cmake --build "$build_dir" --target all diff --git a/cmake/SetupSanitizers.cmake b/cmake/SetupSanitizers.cmake index df0f6cce4f2..5c1debb76c1 100644 --- a/cmake/SetupSanitizers.cmake +++ b/cmake/SetupSanitizers.cmake @@ -47,7 +47,7 @@ function(setup_sanitizers) if (STABLEHLO_ENABLE_SANITIZER_LOWERCASE STREQUAL "address") add_compile_options(-fsanitize=address -fsanitize=undefined -fsanitize=leak -fno-omit-frame-pointer) - link_libraries(-fsanitize=address -fsanitize=undefined -fsanitize=leak) + add_link_options(-fsanitize=address -fsanitize=undefined -fsanitize=leak) else () message(FATAL_ERROR "Unknown sanitizer type: ${STABLEHLO_ENABLE_SANITIZER}") endif ()