Skip to content

Commit

Permalink
Add IWYU to CI (#17078)
Browse files Browse the repository at this point in the history
This PR adds [`include-what-you-use`](https://github.com/include-what-you-use/include-what-you-use/) to the CI job running clang-tidy. Like clang-tidy, IWYU runs via CMake integration and only runs on cpp files, not cu files. This should help us shrink binaries and reduce compilation times in cases where headers are being included unnecessarily, and it helps keep our include lists clean. The IWYU suggestions for additions are quite noisy and the team determined this to be unnecessary, so this PR instead post-filters the outputs to only show the removals. The final suggestions are uploaded to a file that is uploaded to the GHA page so that it can be downloaded, inspected, and easily applied locally.

Resolves #581.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - David Wendt (https://github.com/davidwendt)
  - Yunsong Wang (https://github.com/PointKernel)
  - James Lamb (https://github.com/jameslamb)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #17078
  • Loading branch information
vyasr authored Nov 8, 2024
1 parent 18041b5 commit 7b80a44
Show file tree
Hide file tree
Showing 8 changed files with 271 additions and 47 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
run_script: "ci/clang_tidy.sh"
run_script: "ci/cpp_linters.sh"
file_to_upload: iwyu_results.txt
conda-python-cudf-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
Expand Down
29 changes: 0 additions & 29 deletions ci/clang_tidy.sh

This file was deleted.

47 changes: 47 additions & 0 deletions ci/cpp_linters.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

rapids-logger "Create checks conda environment"
. /opt/conda/etc/profile.d/conda.sh

ENV_YAML_DIR="$(mktemp -d)"

rapids-dependency-file-generator \
--output conda \
--file-key clang_tidy \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml"

rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n clang_tidy

# Temporarily allow unbound variables for conda activation.
set +u
conda activate clang_tidy
set -u

RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"

source rapids-configure-sccache

# TODO: For testing purposes, clone and build IWYU. We can switch to a release
# once a clang 19-compatible version is available, which should be soon
# (https://github.com/include-what-you-use/include-what-you-use/issues/1641).
git clone --depth 1 https://github.com/include-what-you-use/include-what-you-use.git
pushd include-what-you-use
# IWYU's CMake build uses some Python scripts that assume that the cwd is
# importable, so support that legacy behavior.
export PYTHONPATH=${PWD}:${PYTHONPATH:-}
cmake -S . -B build -GNinja --install-prefix=${CONDA_PREFIX}
cmake --build build
cmake --install build
popd

# Run the build via CMake, which will run clang-tidy when CUDF_STATIC_LINTERS is enabled.
cmake -S cpp -B cpp/build -DCMAKE_BUILD_TYPE=Release -DCUDF_STATIC_LINTERS=ON -GNinja
cmake --build cpp/build 2>&1 | python cpp/scripts/parse_iwyu_output.py

# Remove invalid components of the path for local usage. The path below is
# valid in the CI due to where the project is cloned, but presumably the fixes
# will be applied locally from inside a clone of cudf.
sed -i 's/\/__w\/cudf\/cudf\///' iwyu_results.txt
4 changes: 2 additions & 2 deletions cpp/.clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ Checks:
-clang-analyzer-optin.core.EnumCastOutOfRange,
-clang-analyzer-optin.cplusplus.UninitializedObject'

WarningsAsErrors: '*'
HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
WarningsAsErrors: ''
HeaderFilterRegex: '.*cudf/cpp/(src|include).*'
ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
FormatStyle: none
CheckOptions:
Expand Down
58 changes: 44 additions & 14 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ option(
${DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL}
)
mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
option(CUDF_CLANG_TIDY "Enable clang-tidy checking" OFF)
option(CUDF_STATIC_LINTERS "Enable static linters during compilation" OFF)

message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}")
message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
Expand Down Expand Up @@ -146,8 +146,10 @@ if(NOT CUDF_GENERATED_INCLUDE_DIR)
endif()

# ##################################################################################################
# * clang-tidy configuration ----------------------------------------------------------------------
if(CUDF_CLANG_TIDY)
# * linter configuration ---------------------------------------------------------------------------
if(CUDF_STATIC_LINTERS)
# For simplicity, for now we assume that all linters can be installed into an environment where
# any linter is being run. We could relax this requirement if desired.
find_program(
CLANG_TIDY_EXE
NAMES "clang-tidy"
Expand All @@ -174,24 +176,48 @@ if(CUDF_CLANG_TIDY)
"clang-tidy version ${expected_clang_tidy_version} is required, but found ${LLVM_VERSION}"
)
endif()

find_program(IWYU_EXE NAMES include-what-you-use iwyu REQUIRED)
endif()

# Turn on the clang-tidy property for a target excluding the files specified in SKIPPED_FILES.
function(enable_clang_tidy target)
set(_tidy_options)
function(enable_static_checkers target)
set(_tidy_options IWYU CLANG_TIDY)
set(_tidy_one_value)
set(_tidy_multi_value SKIPPED_FILES)
cmake_parse_arguments(
_TIDY "${_tidy_options}" "${_tidy_one_value}" "${_tidy_multi_value}" ${ARGN}
_LINT "${_tidy_options}" "${_tidy_one_value}" "${_tidy_multi_value}" ${ARGN}
)

if(CUDF_CLANG_TIDY)
# clang will complain about unused link libraries on the compile line unless we specify
# -Qunused-arguments.
set_target_properties(
${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments"
)
foreach(file IN LISTS _TIDY_SKIPPED_FILES)
if(CUDF_STATIC_LINTERS)
if(_LINT_CLANG_TIDY)
# clang will complain about unused link libraries on the compile line unless we specify
# -Qunused-arguments.
set_target_properties(
${target} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_EXE};--extra-arg=-Qunused-arguments"
)
endif()
if(_LINT_IWYU)
# A few extra warnings pop up when building with IWYU. I'm not sure why, but they are not
# relevant since they don't show up in any other build so it's better to suppress them until
# we can figure out the cause. Setting this as part of CXX_INCLUDE_WHAT_YOU_USE does not
# appear to be sufficient, we must also ensure that it is set to the underlying target's CXX
# compile flags. To do this completely cleanly we should modify the flags on the target rather
# than the global CUDF_CXX_FLAGS, but this solution is good enough for now since we never run
# the linters on real builds.
foreach(_flag -Wno-missing-braces -Wno-absolute-value -Wunneeded-internal-declaration)
list(FIND CUDF_CXX_FLAGS "${flag}" _flag_index)
if(_flag_index EQUAL -1)
list(APPEND CUDF_CXX_FLAGS ${flag})
endif()
endforeach()
set(CUDF_CXX_FLAGS
"${CUDF_CXX_FLAGS}"
PARENT_SCOPE
)
set_target_properties(${target} PROPERTIES CXX_INCLUDE_WHAT_YOU_USE "${IWYU_EXE}")
endif()
foreach(file IN LISTS _LINT_SKIPPED_FILES)
set_source_files_properties(${file} PROPERTIES SKIP_LINTING ON)
endforeach()
endif()
Expand Down Expand Up @@ -771,11 +797,15 @@ set_target_properties(
INTERFACE_POSITION_INDEPENDENT_CODE ON
)

# Note: This must come before the target_compile_options below so that the function can modify the
# flags if necessary.
enable_static_checkers(
cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp CLANG_TIDY IWYU
)
target_compile_options(
cudf PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
"$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
)
enable_clang_tidy(cudf SKIPPED_FILES src/io/comp/cpu_unbz2.cpp src/io/comp/brotli_dict.cpp)

if(CUDF_BUILD_STACKTRACE_DEBUG)
# Remove any optimization level to avoid nvcc warning "incompatible redefinition for option
Expand Down
170 changes: 170 additions & 0 deletions cpp/scripts/parse_iwyu_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Helper script to modify IWYU output to only include removals.
Lines that are not from include-what-you-use are removed from the output.
"""

import argparse
import re
from enum import Enum


class Mode(Enum):
NORMAL = 0
ADD = 1
REMOVE = 2
FULL_INCLUDE_LIST = 3


def extract_include_file(include_line):
"""Extract the core file path from an #include directive."""
match = re.search(r'#include\s+[<"]([^">]+)[">]', include_line)
if match:
return match.group(1)
return None


def parse_output(input_stream):
include_modifications = {}
current_file = None
mode = Mode.NORMAL

for line in input_stream:
if match := re.match(r"(\/\S+) should add these lines:", line):
current_file = match.group(1)
include_modifications.setdefault(
current_file,
{
"add_includes": [],
"remove_includes": [],
"full_include_list": [],
},
)
mode = Mode.ADD
elif match := re.match(r"(\/\S+) should remove these lines:", line):
mode = Mode.REMOVE
elif match := re.match(r"The full include-list for (\/\S+):", line):
mode = Mode.FULL_INCLUDE_LIST
elif line.strip() == "---":
current_file = None
mode = Mode.NORMAL
else:
if current_file:
if mode == Mode.ADD:
include_modifications[current_file]["add_includes"].append(
line.strip()
)
elif mode == Mode.REMOVE:
include_modifications[current_file][
"remove_includes"
].append(line.strip())
elif mode == Mode.FULL_INCLUDE_LIST:
include_modifications[current_file][
"full_include_list"
].append(line.strip())
else:
if (
line.strip()
and "include-what-you-use reported diagnostics" not in line
and "In file included from" not in line
and "has correct #includes/fwd-decls" not in line
):
print(line, end="")

return include_modifications


def post_process_includes(include_modifications):
"""Deduplicate and remove redundant entries from add and remove includes."""
for mods in include_modifications.values():
# Deduplicate add_includes and remove_includes
mods["add_includes"] = list(set(mods["add_includes"]))
mods["remove_includes"] = list(set(mods["remove_includes"]))

# Extract file paths from add_includes and remove_includes
add_files = {
extract_include_file(line) for line in mods["add_includes"]
}
remove_files = {
extract_include_file(line) for line in mods["remove_includes"]
}

# Remove entries that exist in both add_includes and remove_includes
common_files = add_files & remove_files
mods["add_includes"] = [
line
for line in mods["add_includes"]
if extract_include_file(line) not in common_files
]
mods["remove_includes"] = [
line
for line in mods["remove_includes"]
if extract_include_file(line) not in common_files
]

# Remove entries that exist in add_includes from full_include_list
mods["full_include_list"] = [
include
for include in mods["full_include_list"]
if extract_include_file(include) not in add_files
]


def write_output(include_modifications, output_stream):
for filename, mods in include_modifications.items():
if mods["remove_includes"]:
# IWYU requires all sections to exist, so we write out this header even
# though we never write out any actual additions.
output_stream.write(f"{filename} should add these lines:\n\n")

output_stream.write(f"{filename} should remove these lines:\n")
for line in mods["remove_includes"]:
output_stream.write(line + "\n")
output_stream.write("\n")

output_stream.write(f"The full include-list for {filename}:\n")
for line in mods["full_include_list"]:
output_stream.write(line + "\n")
output_stream.write("---\n")


def main():
parser = argparse.ArgumentParser(
description="Process include modifications from a build output log."
)
parser.add_argument(
"input",
nargs="?",
type=argparse.FileType("r"),
default="-",
help="Input file to read (default: stdin)",
)
parser.add_argument(
"--output",
type=argparse.FileType("w"),
default="iwyu_results.txt",
help="Output file to write (default: iwyu_output.txt)",
)
args = parser.parse_args()

include_modifications = parse_output(args.input)
post_process_includes(include_modifications)
write_output(include_modifications, args.output)


if __name__ == "__main__":
main()
1 change: 0 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ function(ConfigureTest CMAKE_TEST_NAME)
"GTEST_CUDF_STREAM_MODE=new_${_CUDF_TEST_STREAM_MODE}_default;LD_PRELOAD=$<TARGET_FILE:cudf_identify_stream_usage_mode_${_CUDF_TEST_STREAM_MODE}>"
)
endif()
enable_clang_tidy(${CMAKE_TEST_NAME})
endfunction()

# ##################################################################################################
Expand Down
6 changes: 6 additions & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,12 @@ dependencies:
packages:
- clang==19.1.0
- clang-tools==19.1.0
# TODO: These are build requirements for IWYU and can be replaced
# with IWYU itself once a conda package of IWYU supporting clang 19
# is available.
- clangdev==19.1.0
- llvm==19.1.0
- llvmdev==19.1.0
docs:
common:
- output_types: [conda]
Expand Down

0 comments on commit 7b80a44

Please sign in to comment.