diff --git a/.github/actions/nm-summary-test/action.yml b/.github/actions/nm-summary-test/action.yml index 9da08953dfc38..8e036f3d16ffe 100644 --- a/.github/actions/nm-summary-test/action.yml +++ b/.github/actions/nm-summary-test/action.yml @@ -22,6 +22,9 @@ inputs: test_status: description: 'status from test step' required: true + coverage_json: + description: 'file containing coverage report in JSON format' + required: true runs: using: composite steps: @@ -29,8 +32,6 @@ runs: TESTMO_URL=${{ inputs.testmo_run_url }} TEST_STATUS=${{ inputs.test_status }} TEST_EMOJI=$(./.github/scripts/step-status ${TEST_STATUS}) - echo "testmo URL: ${TESTMO_URL}" >> $GITHUB_STEP_SUMMARY - echo "" echo "| Parameter | |" >> $GITHUB_STEP_SUMMARY echo "|---|---|" >> $GITHUB_STEP_SUMMARY echo "| label: | \`${{ inputs.test_label }}\` |" >> $GITHUB_STEP_SUMMARY @@ -42,4 +43,14 @@ runs: echo "| whl: | ${{ inputs.whl }} |" >> $GITHUB_STEP_SUMMARY echo "| magic_wand: | ${{ inputs.magic_wand }} |" >> $GITHUB_STEP_SUMMARY echo "| test: | ${TEST_EMOJI} |" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "#### TestMo URL: ${TESTMO_URL}" >> $GITHUB_STEP_SUMMARY + echo "## Test Coverage" >> $GITHUB_STEP_SUMMARY + # coverage_report_breakdown.py requires the `tabulate` package + # to generate the markdown for the summary. + pip3 install tabulate + # As a multiline response we cannot pass the table directly to github + # so redirect it to a file, then cat the file to the output + python3 ./.github/scripts/coverage_report_breakdown.py ${{ inputs.coverage_json }} > COVERAGE_MD + cat COVERAGE_MD >> $GITHUB_STEP_SUMMARY shell: bash diff --git a/.github/scripts/coverage_report_breakdown.py b/.github/scripts/coverage_report_breakdown.py new file mode 100644 index 0000000000000..3d920f4849595 --- /dev/null +++ b/.github/scripts/coverage_report_breakdown.py @@ -0,0 +1,174 @@ +# this script parses the provided coverage JSON file to report +# the results broken down into categories of interest. +import argparse +from collections import UserDict +from pathlib import Path +from typing import Optional + +import pandas as pd + + +class CoverageMetrics(UserDict): + """ + encapsulates code coverage metrics + """ + + def __init__(self, user_dict): + super().__init__(user_dict) + if "percent_covered_display" in self.data: + del self.data["percent_covered_display"] + + +class CodeCoverage: + """ + reads and reports on code coverage data as generated by the coverage tool + """ + + def __init__(self, file_path: Path): + self.format: [int, None] = None + self.version: [str, None] = None + self.timestamp: [str, None] = None + self.show_contexts: [bool, None] = None + self.branch_coverage: [bool, None] = None + self.overall_metrics: [CoverageMetrics, None] = None + self.tests: [pd.Series, None] = None + self.source: [pd.Series, None] = None + + if file_path.suffix == ".json": + if not file_path.exists(): + raise ValueError(f"{file_path} not found") + self._from_json(file_path) + else: + raise ValueError("only coverage json reports are supported") + + def _from_json(self, json_file_path: Path): + """ + loads the code coverage data from a JSON report generated with + `coverage json` + :param json_file_path: path to the file to load + """ + coverage_df = pd.read_json(json_file_path, orient="records") + self.format = coverage_df["meta"]["format"] + self.version = coverage_df["meta"]["version"] + self.timestamp = coverage_df["meta"]["timestamp"] + self.show_contexts = coverage_df["meta"]["show_contexts"] + self.branch_coverage = coverage_df["meta"]["branch_coverage"] + self.overall_metrics = CoverageMetrics( + coverage_df["totals"].dropna().to_dict()) + + # segment the list of files by test cases and source code + files_df = coverage_df.loc[:, ['files']].dropna() + self.tests = files_df.iloc[files_df.index.str.startswith("tests/")] + self.source = files_df[~files_df.index.isin(self.tests.index)] + + # add a column to the list of source files to facilitate grouping + # metrics by top level directories under vllm + def get_sub_dir(file_path): + file_parts = Path(file_path).parts + subdir = file_parts[file_parts.index("vllm") + 1] + if subdir == Path(file_path).name: + # we're at the root of the vllm dir, so leave subdir empty + subdir = "" + return subdir + + # temporarily move the index to a "filepath" column + self.source.reset_index(names="filepath", inplace=True) + # extract subdirectories under vllm, put into the sub_dir column + self.source.loc[:, "sub_dir"] = self.source.loc[:, "filepath"].apply( + get_sub_dir) + # make the filepath column the index again + self.source.set_index("filepath", inplace=True) + + @staticmethod + def _calculate_metrics(coverage_data: pd.Series) -> CoverageMetrics: + """ + common method to calculate metrics + """ + metrics_dict = {} + for metric in [ + "covered_lines", "num_statements", "missing_lines", + "excluded_lines" + ]: + metrics_dict[metric] = sum(d[0]["summary"][metric] + for d in coverage_data) + metrics_dict["percent_covered"] = metrics_dict[ + "covered_lines"] / metrics_dict["num_statements"] * 100 + return CoverageMetrics(metrics_dict) + + def tests_metrics(self) -> CoverageMetrics: + """ + creates summary metrics for all tests + """ + return self._calculate_metrics(self.tests.values) + + def source_metrics(self, sub_dir: Optional[str] = None) -> CoverageMetrics: + """ + creates summary metrics for the requested vllm subdirectory, + or for the reported vllm source if a subdirectory is not specified. + sub_dir = "" will report for files directly under vllm + """ + data = self.source + if sub_dir is not None: + data = self.source[self.source["sub_dir"] == sub_dir] + + return self._calculate_metrics(data.values) + + def to_github_markdown(self) -> str: + """ + returns a string in the form of github compatible markdown with top + level and drill down metrics. + """ + # make a dataframe with top level metric summary info + overall_metrics = self.overall_metrics + overall_metrics["Collection"] = "Overall" + test_metrics = self.tests_metrics() + test_metrics["Collection"] = "Test Code" + source_metrics = self.source_metrics() + source_metrics["Collection"] = "Source Code" + summary_df = pd.DataFrame( + [overall_metrics, test_metrics, source_metrics]) + # make percent_covered value compatible with the string "%" formatting + summary_df["percent_covered"] = summary_df["percent_covered"] / 100 + + # compose a set of the subdirectory breakdown summary info + breakdown_list = [] + for sub_dir in sorted(cc.source["sub_dir"].unique()): + sub_dir_metrics = cc.source_metrics(sub_dir) + label = "vllm 'root'" if sub_dir == "" else sub_dir + sub_dir_metrics["Collection"] = label + breakdown_list.append(sub_dir_metrics) + breakdown_df = pd.DataFrame(breakdown_list) + # make percent_covered value compatible with the string "%" formatting + breakdown_df["percent_covered"] = breakdown_df["percent_covered"] / 100 + + # join the top level and breakdown data with separator rows between them + # add a separator row and subtitle row + empty_row_df = pd.Series( + pd.NA, index=summary_df.columns).to_frame().transpose() + header_row_df = empty_row_df.copy() + header_row_df["Collection"] = "vllm Subdirs" + summary_df = pd.concat( + [summary_df, empty_row_df, header_row_df, breakdown_df], + ignore_index=True) + # clean up the `nan` values for display purposes + summary_df = summary_df.astype(str) + summary_df.replace({"nan": None}, inplace=True) + + return summary_df.to_markdown(index=False, + tablefmt="github", + missingval="", + floatfmt=(".0f", ".0f", ".0f", ".0f", + ".0f", ".1%"), + colalign=("left", "right", "right", + "right", "right", "decimal")) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("coverage_json_file", + type=str, + help="file path to coverage JSON output") + args = parser.parse_args() + cc = CodeCoverage(Path(args.coverage_json_file)) + + print(cc.to_github_markdown()) diff --git a/.github/scripts/run-tests b/.github/scripts/run-tests index e2e12772d0816..2e0742e0350a1 100755 --- a/.github/scripts/run-tests +++ b/.github/scripts/run-tests @@ -69,7 +69,7 @@ done # run selected tests SUCCESS=0 -CC_PYTEST_FLAGS="--cov=${SRC_DIR} --cov=${TEST_DIR} --cov-report=html:cc-vllm-html --cov-append" +CC_PYTEST_FLAGS="--cov=${SRC_DIR} --cov=${TEST_DIR} --cov-report=html:cc-vllm-html --cov-report=json:cc-vllm.json --cov-append" for TEST in "${TESTS_FOUND[@]}" do LOCAL_SUCCESS=0 diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml index 01d6fa96730d3..887b236190b46 100644 --- a/.github/workflows/nm-test.yml +++ b/.github/workflows/nm-test.yml @@ -124,7 +124,6 @@ jobs: uses: ./.github/actions/nm-install-whl/ with: python: ${{ inputs.python }} - venv: - name: run buildkite script run: | @@ -151,6 +150,14 @@ jobs: path: cc-vllm-html retention-days: 15 + - name: upload code coverage json + uses: actions/upload-artifact@v4 + if: success() || failure() + with: + name: cc-vllm-json-${{ inputs.test_label }}-${{ inputs.python }} + path: cc-vllm.json + retention-days: 5 + - name: report test results id: report_test uses: ./.github/actions/nm-testmo-run-submit-thread/ @@ -173,6 +180,7 @@ jobs: whl: ${{ steps.test.outputs.whl }} magic_wand: ${{ steps.test.outputs.magic_wand }} test_status: ${{ steps.test.outputs.status }} + coverage_json: cc-vllm.json - name: complete testmo run uses: ./.github/actions/nm-testmo-run-complete/ diff --git a/pyproject.toml b/pyproject.toml index d1bcf81d3f763..5b7666d2830cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,7 @@ exclude_also = [ "if __name__ == .__main__.:", "if TYPE_CHECKING:", "@(abc\\.)?abstractmethod", + "@overload", # coverage for these devices is to be ignored until we plan to deploy them 'engine_config.device_config.device_type == "cpu"', 'engine_config.device_config.device_type == "neuron"', @@ -114,6 +115,11 @@ exclude_also = [ "def _shared_pointers", "def np_cache_weights_iterator", "def convert_pyslice_to_tensor", + "def _shared_pointers", + "def convert_bin_to_safetensor_file", + "def np_cache_weights_iterator", + "def convert_pyslice_to_tensor", + "if is_usage_stats_enabled" ] [tool.coverage.html]