From fb859688c64930b8e70fd3a41ab4160b9a77ab31 Mon Sep 17 00:00:00 2001 From: Christopher Bate Date: Wed, 4 Dec 2024 22:41:00 -0700 Subject: [PATCH] [mlir-tensorrt] Add additional missing StableHLO patch (#425) Added a patch that was missing from the last StableHLO upgrade. This patch addresses some issues mentioned in https://github.com/openxla/stablehlo/issues/2634. An additional test is added to mlir-tensorrt as a regression test. --- .github/workflows/mlir-tensorrt-ci.yml | 289 +----------------- mlir-tensorrt/CMakeLists.txt | 1 + ...sing-checks-for-static-shapes-in-sta.patch | 118 +++++++ .../build_tools/scripts/cicd_build.sh | 32 +- .../TensorRTToTensorRTRuntime/CMakeLists.txt | 2 +- .../Dialect/Plan/segmentation-pipeline.mlir | 22 +- .../compiler_api/test_options_context.py | 2 +- 7 files changed, 168 insertions(+), 298 deletions(-) create mode 100644 mlir-tensorrt/build_tools/patches/stablehlo/0002-Fix-a-couple-missing-checks-for-static-shapes-in-sta.patch diff --git a/.github/workflows/mlir-tensorrt-ci.yml b/.github/workflows/mlir-tensorrt-ci.yml index e9ff1e611..3b6a1ad4a 100644 --- a/.github/workflows/mlir-tensorrt-ci.yml +++ b/.github/workflows/mlir-tensorrt-ci.yml @@ -17,7 +17,7 @@ env: jobs: mlir-tensorrt-test-pr: - if: github.event_name == 'pull_request' && github.event.pull_request.draft == false + if: github.event_name != 'pull_request' || github.event.pull_request.draft == false # `ubuntu-latest` is a CPU runner. # If selected, tests requiring GPU are not run. runs-on: ubuntu-latest @@ -110,320 +110,55 @@ jobs: - name: Create cache folder run: | mkdir -p ${{ github.workspace }}/ccache - mkdir -p ${{ github.workspace }}/.ccache.cpm + mkdir -p ${{ github.workspace }}/.cache.cpm # Create cache action - name: Create cache action id: core-build-cache uses: actions/cache@v4 with: - key: ${{ runner.os }}-mlir-tensorrt-core-build + key: ${{ runner.os }}-mlir-tensorrt-cache-${{ hashFiles('mlir-tensorrt/**/*.cpp', 'mlir-tensorrt/**/*.h', 'mlir-tensorrt/build_tools/**/*') }} + restore-keys: | + ${{ runner.os }}-mlir-tensorrt-cache- path: | ${{ github.workspace }}/ccache - ${{ github.workspace }}/.ccache.cpm + ${{ github.workspace }}/.cache.cpm # Run LIT tests with TensorRT 10 - name: Run MLIR-TensorRT lit tests with TensorRT 10 uses: addnab/docker-run-action@v3 with: image: ${{ env.DEFAULT_IMAGE }} - options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.ccache.cpm:/.ccache.cpm + options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.cache.cpm:/.cache.cpm registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} run: | - export CCACHE_DIR="/ccache" - export CCACHE_MAXSIZE=10G - ccache --zero-stats || true - ccache --show-stats || true - - cd mlir-tensorrt - cat > build_and_test.sh < build_and_test.sh < build_and_test.sh < build_and_test.sh < build_and_test.sh < build_and_test.sh < +Date: Wed, 27 Nov 2024 00:10:11 +0000 +Subject: [PATCH] Fix a couple missing checks for static shapes in + `stablehlo-aggressive-folder` + +--- + .../stablehlo_aggressive_folder.mlir | 27 +++++++++++++------ + .../transforms/StablehloAggressiveFolder.cpp | 9 +++++++ + 2 files changed, 28 insertions(+), 8 deletions(-) + +diff --git a/stablehlo/tests/transforms/stablehlo_aggressive_folder.mlir b/stablehlo/tests/transforms/stablehlo_aggressive_folder.mlir +index 5b21a10d..c90c89c6 100644 +--- a/stablehlo/tests/transforms/stablehlo_aggressive_folder.mlir ++++ b/stablehlo/tests/transforms/stablehlo_aggressive_folder.mlir +@@ -4,14 +4,17 @@ + // AddOp + + // CHECK-LABEL: @add_fold_cst +-func.func @add_fold_cst() -> (tensor, tensor) { ++func.func @add_fold_cst() -> (tensor, tensor, tensor) { + %cst = stablehlo.constant dense<1> : tensor + %cst_1 = stablehlo.constant dense<1.0> : tensor ++ %cst_2 = stablehlo.constant dense<2.0> : tensor<1xf32> + // CHECK: stablehlo.constant dense<2> : tensor + // CHECK: stablehlo.constant dense<2.0{{.*}}> : tensor ++ // CHECK: stablehlo.add + %0 = stablehlo.add %cst, %cst : tensor + %1 = stablehlo.add %cst_1, %cst_1 : tensor +- return %0, %1 : tensor, tensor ++ %2 = stablehlo.add %cst_2, %cst_2 : (tensor<1xf32>, tensor<1xf32>) -> tensor ++ return %0, %1, %2 : tensor, tensor, tensor + } + + // ----- +@@ -106,14 +109,17 @@ func.func @concatenate_fold() -> (tensor<6xi32>, tensor<3xi32>, tensor<3x3xi32>, + // MulOp + + // CHECK-LABEL: @mul_fold_cst +-func.func @mul_fold_cst() -> (tensor, tensor) { ++func.func @mul_fold_cst() -> (tensor, tensor, tensor) { + %cst = stablehlo.constant dense<2> : tensor + %cst_1 = stablehlo.constant dense<2.0> : tensor ++ %cst_2 = stablehlo.constant dense<2.0> : tensor<1xf32> + // CHECK: stablehlo.constant dense<4> : tensor + // CHECK: stablehlo.constant dense<4.0{{.*}}> : tensor ++ // CHECK: stablehlo.multiply + %0 = stablehlo.multiply %cst, %cst : tensor + %1 = stablehlo.multiply %cst_1, %cst_1 : tensor +- return %0, %1 : tensor, tensor ++ %2 = stablehlo.multiply %cst_2, %cst_2 : (tensor<1xf32>, tensor<1xf32>) -> tensor ++ return %0, %1, %2 : tensor, tensor, tensor + } + + // ----- +@@ -122,16 +128,21 @@ func.func @mul_fold_cst() -> (tensor, tensor) { + // SubtractOp + + // CHECK-LABEL: @subtract_fold_cst +-func.func @subtract_fold_cst() -> (tensor, tensor) { ++func.func @subtract_fold_cst() -> (tensor, tensor, tensor) { + %cst = stablehlo.constant dense<1> : tensor + %cst_1 = stablehlo.constant dense<3> : tensor + %cst_2 = stablehlo.constant dense<1.0> : tensor + %cst_3 = stablehlo.constant dense<3.0> : tensor +- // CHECK: stablehlo.constant dense<2> : tensor +- // CHECK: stablehlo.constant dense<2.0{{.*}}> : tensor ++ %cst_4 = stablehlo.constant dense<4.0> : tensor<1xf32> ++ %cst_5 = stablehlo.constant dense<5.0> : tensor<1xf32> ++ // CHECK: %[[V1:.+]] = stablehlo.constant dense<2> : tensor ++ // CHECK: %[[V2:.+]] = stablehlo.constant dense<2.0{{.*}}> : tensor ++ // CHECK: %[[V3:.+]] = stablehlo.subtract ++ // CHECK: return %[[V1]], %[[V2]], %[[V3]] + %0 = stablehlo.subtract %cst_1, %cst : tensor + %1 = stablehlo.subtract %cst_3, %cst_2 : tensor +- return %0, %1 : tensor, tensor ++ %2 = stablehlo.subtract %cst_4, %cst_5 : (tensor<1xf32>, tensor<1xf32>) -> tensor ++ return %0, %1, %2 : tensor, tensor, tensor + } + + // ----- +diff --git a/stablehlo/transforms/StablehloAggressiveFolder.cpp b/stablehlo/transforms/StablehloAggressiveFolder.cpp +index a9107514..dadc14fb 100644 +--- a/stablehlo/transforms/StablehloAggressiveFolder.cpp ++++ b/stablehlo/transforms/StablehloAggressiveFolder.cpp +@@ -257,6 +257,9 @@ struct FoldAddOpPattern final : OpRewritePattern { + + LogicalResult matchAndRewrite(mlir::stablehlo::AddOp op, + PatternRewriter& rewriter) const override { ++ if (failed(validateResultTypeForEval(rewriter, op, op.getType()))) ++ return failure(); ++ + Value lhs = op.getLhs(); + Value rhs = op.getRhs(); + +@@ -548,6 +551,9 @@ struct FoldMulOpPattern final : OpRewritePattern { + + LogicalResult matchAndRewrite(mlir::stablehlo::MulOp op, + PatternRewriter& rewriter) const override { ++ if (failed(validateResultTypeForEval(rewriter, op, op.getType()))) ++ return failure(); ++ + auto elemType = op.getType().getElementType(); + Value lhs = op.getLhs(); + Value rhs = op.getRhs(); +@@ -747,6 +753,9 @@ struct FoldSubtractOpPattern final + + LogicalResult matchAndRewrite(mlir::stablehlo::SubtractOp op, + PatternRewriter& rewriter) const override { ++ if (failed(validateResultTypeForEval(rewriter, op, op.getType()))) ++ return failure(); ++ + Value lhs = op.getLhs(); + Value rhs = op.getRhs(); + +-- +2.47.0 + diff --git a/mlir-tensorrt/build_tools/scripts/cicd_build.sh b/mlir-tensorrt/build_tools/scripts/cicd_build.sh index 69747be6c..c0cf621f1 100755 --- a/mlir-tensorrt/build_tools/scripts/cicd_build.sh +++ b/mlir-tensorrt/build_tools/scripts/cicd_build.sh @@ -3,29 +3,25 @@ set -ex set -o pipefail REPO_ROOT=$(pwd) -BUILD_DIR="${BUILD_DIR:=${REPO_ROOT}/build}" - -ENABLE_NCCL=${ENABLE_NCCL:OFF} -RUN_LONG_TESTS=${RUN_LONG_TESTS:-False} +BUILD_DIR="${REPO_ROOT}/build" LLVM_LIT_ARGS=${LLVM_LIT_ARGS:-"-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --timeout=1200 --time-tests -Drun_long_tests=${RUN_LONG_TESTS}"} -DOWNLOAD_TENSORRT_VERSION=${DOWNLOAD_TENSORRT_VERSION:-10.0.0.6} +DOWNLOAD_TENSORRT_VERSION=${DOWNLOAD_TENSORRT_VERSION:-10.5} ENABLE_ASAN=${ENABLE_ASAN:-OFF} +export CCACHE_DIR="/ccache" + +ccache --zero-stats || true +rm -rf ${BUILD_DIR} || true + echo "Using DOWNLOAD_TENSORRT_VERSION=${DOWNLOAD_TENSORRT_VERSION}" echo "Using LLVM_LIT_ARGS=${LLVM_LIT_ARGS}" -cmake -GNinja -B "${BUILD_DIR}" -S "${REPO_ROOT}" \ - -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ - -DMLIR_TRT_USE_LINKER=lld -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DMLIR_TRT_PACKAGE_CACHE_DIR=$PWD/.cache.cpm \ - -DMLIR_TRT_ENABLE_PYTHON=ON \ - -DMLIR_TRT_ENABLE_NCCL=${ENABLE_NCCL} \ - -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION="$DOWNLOAD_TENSORRT_VERSION" \ - -DLLVM_LIT_ARGS="${LLVM_LIT_ARGS}" \ +cmake --preset ninja-llvm \ + -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION="${DOWNLOAD_TENSORRT_VERSION}" \ -DENABLE_ASAN="${ENABLE_ASAN}" \ - -DCMAKE_PLATFORM_NO_VERSIONED_SONAME=ON + -DLLVM_LIT_ARGS="${LLVM_LIT_ARGS}" \ + -DCPM_SOURCE_CACHE="/.cache.cpm" \ + -DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF -echo "==== Running Build ===" -ninja -C ${BUILD_DIR} -k 0 check-mlir-executor -ninja -C ${BUILD_DIR} -k 0 check-mlir-tensorrt -ninja -C ${BUILD_DIR} -k 0 check-mlir-tensorrt-dialect +ninja -C ${BUILD_DIR} -k 0 check-all-mlir-tensorrt +ccache --show-stats || true diff --git a/mlir-tensorrt/compiler/lib/Conversion/TensorRTToTensorRTRuntime/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Conversion/TensorRTToTensorRTRuntime/CMakeLists.txt index 050c6de8c..016d30a66 100644 --- a/mlir-tensorrt/compiler/lib/Conversion/TensorRTToTensorRTRuntime/CMakeLists.txt +++ b/mlir-tensorrt/compiler/lib/Conversion/TensorRTToTensorRTRuntime/CMakeLists.txt @@ -1,7 +1,7 @@ add_mlir_tensorrt_library(MLIRTensorRTTensorRTToTensorRTRuntime TensorRTToTensorRTRuntime.cpp - LINK_LIBS + LINK_LIBS PUBLIC MLIRTensorRTExecutorUtils MLIRTensorRTDialect MLIRTensorRTTensorRTRuntimeDialect diff --git a/mlir-tensorrt/test/Dialect/Plan/segmentation-pipeline.mlir b/mlir-tensorrt/test/Dialect/Plan/segmentation-pipeline.mlir index 577de6208..66e4e79b7 100644 --- a/mlir-tensorrt/test/Dialect/Plan/segmentation-pipeline.mlir +++ b/mlir-tensorrt/test/Dialect/Plan/segmentation-pipeline.mlir @@ -224,4 +224,24 @@ builtin.module @simple_gather_dynamic attributes { // CHECK-DAG: %[[v1:.+]] = stablehlo.reshape %[[v0]] : (tensor) -> tensor<1xi32> // CHECK-DAG: %[[v2:.+]] = stablehlo.concatenate %[[c]], %[[v1]], %[[c_0]], %[[c_0]] // CHECK-DAG: %[[v3:.+]] = "stablehlo.dynamic_gather"(%[[arg1]], %[[arg0]], %[[v2]]) -// CHECK-DAG: return %[[v3]] : tensor \ No newline at end of file +// CHECK-DAG: return %[[v3]] : tensor + +// ----- + +builtin.module attributes { + plan.cluster_kinds = [ + #plan.tensorrt_cluster, + #plan.host_cluster + ] +} { + func.func @static_type_refinement() -> tensor{ + %c_0 = stablehlo.constant dense<1> : tensor<1x1xi32> + %c_1 = stablehlo.constant dense<2> : tensor<1x1xi32> + %0 = stablehlo.subtract %c_0, %c_1 : (tensor<1x1xi32>, tensor<1x1xi32>) -> tensor + return %0 : tensor + } +} + +// CHECK-LABEL: func.func @static_type_refinement() -> tensor<1x1xi32> +// CHECK-LABEL: tensorrt.module +// CHECK: stablehlo.subtract {{.*}} : tensor<1x1xi32> diff --git a/mlir-tensorrt/test/python/mlir_tensorrt_compiler/compiler_api/test_options_context.py b/mlir-tensorrt/test/python/mlir_tensorrt_compiler/compiler_api/test_options_context.py index d74198311..95b9c591a 100644 --- a/mlir-tensorrt/test/python/mlir_tensorrt_compiler/compiler_api/test_options_context.py +++ b/mlir-tensorrt/test/python/mlir_tensorrt_compiler/compiler_api/test_options_context.py @@ -27,4 +27,4 @@ # CHECK: InvalidArgument: InvalidArgument: non-existent-options-type is not a valid option type. Valid options were: stable-hlo-to-executable -# CHECK: --tensorrt-timing-cache-path= --device-infer-from-host=true --debug-only= --executor-index-bitwidth=64 --entrypoint=main --plan-clustering-disallow-host-tensors-in-tensorrt-clusters=false --tensorrt-workspace-memory-pool-limit=1073741824 --device-max-registers-per-block=65536 --tensorrt-strongly-typed=false --tensorrt-layer-info-dir= --device-compute-capability=86 --debug=false --mlir-print-ir-tree-dir= --disable-tensorrt-extension=false --tensorrt-builder-opt-level=3 --tensorrt-engines-dir= +# CHECK: Options[{{.*}}]