From fb859688c64930b8e70fd3a41ab4160b9a77ab31 Mon Sep 17 00:00:00 2001
From: Christopher Bate <cbate@nvidia.com>
Date: Wed, 4 Dec 2024 22:41:00 -0700
Subject: [PATCH] [mlir-tensorrt] Add additional missing StableHLO patch (#425)

Added a patch that was missing from the last StableHLO upgrade. This
patch
addresses some issues mentioned in
https://github.com/openxla/stablehlo/issues/2634.

An additional test is added to mlir-tensorrt as a regression test.
---
 .github/workflows/mlir-tensorrt-ci.yml        | 289 +-----------------
 mlir-tensorrt/CMakeLists.txt                  |   1 +
 ...sing-checks-for-static-shapes-in-sta.patch | 118 +++++++
 .../build_tools/scripts/cicd_build.sh         |  32 +-
 .../TensorRTToTensorRTRuntime/CMakeLists.txt  |   2 +-
 .../Dialect/Plan/segmentation-pipeline.mlir   |  22 +-
 .../compiler_api/test_options_context.py      |   2 +-
 7 files changed, 168 insertions(+), 298 deletions(-)
 create mode 100644 mlir-tensorrt/build_tools/patches/stablehlo/0002-Fix-a-couple-missing-checks-for-static-shapes-in-sta.patch

diff --git a/.github/workflows/mlir-tensorrt-ci.yml b/.github/workflows/mlir-tensorrt-ci.yml
index e9ff1e611..3b6a1ad4a 100644
--- a/.github/workflows/mlir-tensorrt-ci.yml
+++ b/.github/workflows/mlir-tensorrt-ci.yml
@@ -17,7 +17,7 @@ env:
 
 jobs:
   mlir-tensorrt-test-pr:
-    if: github.event_name == 'pull_request' && github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || github.event.pull_request.draft == false
     # `ubuntu-latest` is a CPU runner.
     # If selected, tests requiring GPU are not run.
     runs-on: ubuntu-latest
@@ -110,320 +110,55 @@ jobs:
       - name: Create cache folder
         run: |
           mkdir -p ${{ github.workspace }}/ccache
-          mkdir -p ${{ github.workspace }}/.ccache.cpm
+          mkdir -p ${{ github.workspace }}/.cache.cpm
 
       # Create cache action
       - name: Create cache action
         id: core-build-cache
         uses: actions/cache@v4
         with:
-          key: ${{ runner.os }}-mlir-tensorrt-core-build
+          key: ${{ runner.os }}-mlir-tensorrt-cache-${{ hashFiles('mlir-tensorrt/**/*.cpp', 'mlir-tensorrt/**/*.h', 'mlir-tensorrt/build_tools/**/*') }}
+          restore-keys: |
+            ${{ runner.os }}-mlir-tensorrt-cache-
           path: |
             ${{ github.workspace }}/ccache
-            ${{ github.workspace }}/.ccache.cpm
+            ${{ github.workspace }}/.cache.cpm
 
       # Run LIT tests with TensorRT 10
       - name: Run MLIR-TensorRT lit tests with TensorRT 10
         uses: addnab/docker-run-action@v3
         with:
           image: ${{ env.DEFAULT_IMAGE }}
-          options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.ccache.cpm:/.ccache.cpm
+          options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.cache.cpm:/.cache.cpm
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
           run: |
-            export CCACHE_DIR="/ccache"
-            export CCACHE_MAXSIZE=10G
-            ccache --zero-stats || true
-            ccache --show-stats || true
-
-            cd mlir-tensorrt
-            cat > build_and_test.sh <<EOF
-            #!/bin/bash
-            set -e
-
-            python3 -m pip install -r python/requirements-dev.txt
-
-            cmake -B ./build -S . -G Ninja \
-              -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-              -DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-              -DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-              -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.5 \
-              -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-              -DMLIR_TRT_USE_LINKER=lld \
-              -DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF
-
-            ninja -C build all
-
-            ninja -C build check-mlir-executor
-            ninja -C build check-mlir-tensorrt-dialect
-            ninja -C build check-mlir-tensorrt
-
-            cd ..
-            ccache --show-stats || true
-            EOF
-
-            bash build_and_test.sh
-
-      # Run LIT tests with TensorRT 10 & ASAN
-      - name: Run MLIR-TensorRT lit tests with TensorRT 10, ASAN enabled
-        uses: addnab/docker-run-action@v3
-        with:
-          image: ${{ env.DEFAULT_IMAGE }}
-          options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.ccache.cpm:/.ccache.cpm
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-          run: |
-            export CCACHE_DIR="/ccache"
-            export CCACHE_MAXSIZE=10G
-            ccache --zero-stats || true
-            ccache --show-stats || true
-
-            cd mlir-tensorrt
-            cat > build_and_test.sh <<EOF
-            #!/bin/bash
-            set -e
-
-            python3 -m pip install -r python/requirements-dev.txt
-
-            cmake -B ./build -S . -G Ninja \
-              -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-              -DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-              -DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-              -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.5 \
-              -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-              -DMLIR_TRT_USE_LINKER=lld \
-              -DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF \
-              -DENABLE_ASAN=ON
-
-            ninja -C build all
-
-            ninja -C build check-mlir-executor
-            ninja -C build check-mlir-tensorrt-dialect
-            ninja -C build check-mlir-tensorrt
-
-            cd ..
-            ccache --show-stats || true
-            EOF
-
-            bash build_and_test.sh
-
-      # Run LIT tests with TensorRT 8
-      - name: Run MLIR-TensorRT lit tests with TensorRT 8
-        uses: addnab/docker-run-action@v3
-        with:
-          image: ${{ env.DEFAULT_IMAGE }}
-          options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.ccache.cpm:/.ccache.cpm
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-          run: |
-            export CCACHE_DIR="/ccache"
-            export CCACHE_MAXSIZE=10G
-            ccache --zero-stats || true
-            ccache --show-stats || true
-
-            cd mlir-tensorrt
-            cat > build_and_test.sh <<EOF
-            #!/bin/bash
-            set -e
-
-            python3 -m pip install -r python/requirements-dev.txt
-
-            cmake -B ./build -S . -G Ninja \
-              -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-              -DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-              -DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-              -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=8.6.1.6 \
-              -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-              -DMLIR_TRT_USE_LINKER=lld \
-              -DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF
-
-            ninja -C build all
-
-            ninja -C build check-mlir-executor
-            ninja -C build check-mlir-tensorrt-dialect
-            ninja -C build check-mlir-tensorrt
-
-            cd ..
-            ccache --show-stats || true
-            EOF
-
-            bash build_and_test.sh
-
-  mlir-tensorrt-test-main:
-    if: github.event_name == 'push'
-    # `ubuntu-latest` is a CPU runner.
-    # If selected, tests requiring GPU are not run.
-    runs-on: ubuntu-latest
-
-    steps:
-      # Free some disk space, otherwise we get OOM error.
-      - name: Free disk space
-        run: |
-          sudo rm -rf \
-            /usr/share/dotnet "$AGENT_TOOLSDIRECTORY" /usr/local/lib/android /opt/ghc \
-            /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \
-            /usr/lib/jvm
-
-          sudo apt-get purge microsoft-edge-stable || true
-          sudo apt-get purge google-cloud-cli || true
-          sudo apt-get purge dotnet-sdk-* || true
-          sudo apt-get purge google-chrome-stable || true
-
-          sudo apt-get autoremove -y
-          sudo apt-get autoclean -y
-
-      # Value of `github.workspace` is /home/runner/work/{repo_name}/{repo-name}
-      # i.e. /home/runner/work/TensorRT-Incubator/TensorRT-Incubator in our case.
-      # After this action, repo is cloned inside above path.
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 5
-
-      # Create cache folders
-      - name: Create cache folder
-        run: |
-          mkdir -p ${{ github.workspace }}/ccache
-          mkdir -p ${{ github.workspace }}/.ccache.cpm
-
-      # Create cache action
-      - name: Create cache action
-        id: core-build-cache
-        uses: actions/cache@v4
-        with:
-          key: ${{ runner.os }}-mlir-tensorrt-core-build
-          path: |
-            ${{ github.workspace }}/ccache
-            ${{ github.workspace }}/.ccache.cpm
-
-      # Run LIT tests with TensorRT 10
-      - name: Run MLIR-TensorRT lit tests with TensorRT 10
-        uses: addnab/docker-run-action@v3
-        with:
-          image: ${{ env.DEFAULT_IMAGE }}
-          options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.ccache.cpm:/.ccache.cpm
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-          run: |
-            export CCACHE_DIR="/ccache"
-            export CCACHE_MAXSIZE=10G
-            ccache --zero-stats || true
-            ccache --show-stats || true
-
             cd mlir-tensorrt
-            cat > build_and_test.sh <<EOF
-            #!/bin/bash
-            set -e
-
-            python3 -m pip install -r python/requirements-dev.txt
-
-            cmake -B ./build -S . -G Ninja \
-              -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-              -DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-              -DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-              -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.5 \
-              -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-              -DMLIR_TRT_USE_LINKER=lld \
-              -DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF
-
-            ninja -C build all
-
-            ninja -C build check-mlir-executor
-            ninja -C build check-mlir-tensorrt-dialect
-            ninja -C build check-mlir-tensorrt
-
-            cd ..
-            ccache --show-stats || true
-            EOF
-
-            bash build_and_test.sh
+            ./build_tools/scripts/cicd_build.sh
 
       # Run LIT tests with TensorRT 10 & ASAN
       - name: Run MLIR-TensorRT lit tests with TensorRT 10, ASAN enabled
         uses: addnab/docker-run-action@v3
         with:
           image: ${{ env.DEFAULT_IMAGE }}
-          options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.ccache.cpm:/.ccache.cpm
+          options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.cache.cpm:/.cache.cpm
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
           run: |
-            export CCACHE_DIR="/ccache"
-            export CCACHE_MAXSIZE=10G
-            ccache --zero-stats || true
-            ccache --show-stats || true
-
             cd mlir-tensorrt
-            cat > build_and_test.sh <<EOF
-            #!/bin/bash
-            set -e
-
-            python3 -m pip install -r python/requirements-dev.txt
-
-            cmake -B ./build -S . -G Ninja \
-              -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-              -DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-              -DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-              -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=10.5 \
-              -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-              -DMLIR_TRT_USE_LINKER=lld \
-              -DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF \
-              -DENABLE_ASAN=ON
-
-            ninja -C build all
-
-            ninja -C build check-mlir-executor
-            ninja -C build check-mlir-tensorrt-dialect
-            ninja -C build check-mlir-tensorrt
-
-            cd ..
-            ccache --show-stats || true
-            EOF
-
-            bash build_and_test.sh
+            ENABLE_ASAN=ON ./build_tools/scripts/cicd_build.sh
 
       # Run LIT tests with TensorRT 8
       - name: Run MLIR-TensorRT lit tests with TensorRT 8
         uses: addnab/docker-run-action@v3
         with:
           image: ${{ env.DEFAULT_IMAGE }}
-          options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.ccache.cpm:/.ccache.cpm
+          options: -v ${{ github.workspace }}/mlir-tensorrt:/mlir-tensorrt -v ${{ github.workspace }}/ccache:/ccache -v ${{ github.workspace }}/.cache.cpm:/.cache.cpm
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
           run: |
-            export CCACHE_DIR="/ccache"
-            export CCACHE_MAXSIZE=10G
-            ccache --zero-stats || true
-            ccache --show-stats || true
-
             cd mlir-tensorrt
-            cat > build_and_test.sh <<EOF
-            #!/bin/bash
-            set -e
-
-            python3 -m pip install -r python/requirements-dev.txt
-
-            cmake -B ./build -S . -G Ninja \
-              -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-              -DMLIR_TRT_PACKAGE_CACHE_DIR=/.cache.cpm \
-              -DMLIR_TRT_ENABLE_ASSERTIONS=ON \
-              -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION=8.6.1.6 \
-              -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-              -DMLIR_TRT_USE_LINKER=lld \
-              -DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF
-
-            ninja -C build all
-
-            ninja -C build check-mlir-executor
-            ninja -C build check-mlir-tensorrt-dialect
-            ninja -C build check-mlir-tensorrt
-
-            cd ..
-            ccache --show-stats || true
-            EOF
-
-            bash build_and_test.sh
+            DOWNLOAD_TENSORRT_VERSION="8.6.1.6" ./build_tools/scripts/cicd_build.sh
diff --git a/mlir-tensorrt/CMakeLists.txt b/mlir-tensorrt/CMakeLists.txt
index 7a36a7887..edd9512ea 100644
--- a/mlir-tensorrt/CMakeLists.txt
+++ b/mlir-tensorrt/CMakeLists.txt
@@ -186,6 +186,7 @@ if(MLIR_TRT_ENABLE_HLO AND NOT TARGET StablehloOps)
     GIT_REPOSITORY "https://github.com/openxla/stablehlo.git"
     PATCHES
       "${CMAKE_CURRENT_LIST_DIR}/build_tools/patches/stablehlo/0001-transforms-Fix-simplification-patterns-for-stablehlo.patch"
+      "${CMAKE_CURRENT_LIST_DIR}/build_tools/patches/stablehlo/0002-Fix-a-couple-missing-checks-for-static-shapes-in-sta.patch"
     OPTIONS
       "STABLEHLO_ENABLE_BINDINGS_PYTHON ${MLIR_TRT_ENABLE_PYTHON}"
       "STABLEHLO_BUILD_EMBEDDED ON"
diff --git a/mlir-tensorrt/build_tools/patches/stablehlo/0002-Fix-a-couple-missing-checks-for-static-shapes-in-sta.patch b/mlir-tensorrt/build_tools/patches/stablehlo/0002-Fix-a-couple-missing-checks-for-static-shapes-in-sta.patch
new file mode 100644
index 000000000..767fe3133
--- /dev/null
+++ b/mlir-tensorrt/build_tools/patches/stablehlo/0002-Fix-a-couple-missing-checks-for-static-shapes-in-sta.patch
@@ -0,0 +1,118 @@
+From fb0378d09cebb74da6ca253f6b41241a26bab43e Mon Sep 17 00:00:00 2001
+From: Christopher Bate <cbate@nvidia.com>
+Date: Wed, 27 Nov 2024 00:10:11 +0000
+Subject: [PATCH] Fix a couple missing checks for static shapes in
+ `stablehlo-aggressive-folder`
+
+---
+ .../stablehlo_aggressive_folder.mlir          | 27 +++++++++++++------
+ .../transforms/StablehloAggressiveFolder.cpp  |  9 +++++++
+ 2 files changed, 28 insertions(+), 8 deletions(-)
+
+diff --git a/stablehlo/tests/transforms/stablehlo_aggressive_folder.mlir b/stablehlo/tests/transforms/stablehlo_aggressive_folder.mlir
+index 5b21a10d..c90c89c6 100644
+--- a/stablehlo/tests/transforms/stablehlo_aggressive_folder.mlir
++++ b/stablehlo/tests/transforms/stablehlo_aggressive_folder.mlir
+@@ -4,14 +4,17 @@
+ // AddOp
+ 
+ // CHECK-LABEL: @add_fold_cst
+-func.func @add_fold_cst() -> (tensor<i32>, tensor<f32>) {
++func.func @add_fold_cst() -> (tensor<i32>, tensor<f32>, tensor<?xf32>) {
+   %cst = stablehlo.constant dense<1> : tensor<i32>
+   %cst_1 = stablehlo.constant dense<1.0> : tensor<f32>
++  %cst_2 = stablehlo.constant dense<2.0> : tensor<1xf32>
+   // CHECK: stablehlo.constant dense<2> : tensor<i32>
+   // CHECK: stablehlo.constant dense<2.0{{.*}}> : tensor<f32>
++  // CHECK: stablehlo.add
+   %0 = stablehlo.add %cst, %cst : tensor<i32>
+   %1 = stablehlo.add %cst_1, %cst_1 : tensor<f32>
+-  return %0, %1 : tensor<i32>, tensor<f32>
++  %2 = stablehlo.add %cst_2, %cst_2 : (tensor<1xf32>, tensor<1xf32>) ->  tensor<?xf32>
++  return %0, %1, %2 : tensor<i32>, tensor<f32>, tensor<?xf32>
+ }
+ 
+ // -----
+@@ -106,14 +109,17 @@ func.func @concatenate_fold() -> (tensor<6xi32>, tensor<3xi32>, tensor<3x3xi32>,
+ // MulOp
+ 
+ // CHECK-LABEL: @mul_fold_cst
+-func.func @mul_fold_cst() -> (tensor<i32>, tensor<f32>) {
++func.func @mul_fold_cst() -> (tensor<i32>, tensor<f32>, tensor<?xf32>) {
+   %cst = stablehlo.constant dense<2> : tensor<i32>
+   %cst_1 = stablehlo.constant dense<2.0> : tensor<f32>
++  %cst_2 = stablehlo.constant dense<2.0> : tensor<1xf32>
+   // CHECK: stablehlo.constant dense<4> : tensor<i32>
+   // CHECK: stablehlo.constant dense<4.0{{.*}}> : tensor<f32>
++  // CHECK: stablehlo.multiply
+   %0 = stablehlo.multiply %cst, %cst : tensor<i32>
+   %1 = stablehlo.multiply %cst_1, %cst_1 : tensor<f32>
+-  return %0, %1 : tensor<i32>, tensor<f32>
++  %2 = stablehlo.multiply %cst_2, %cst_2 : (tensor<1xf32>, tensor<1xf32>) -> tensor<?xf32>
++  return %0, %1, %2 : tensor<i32>, tensor<f32>, tensor<?xf32>
+ }
+ 
+ // -----
+@@ -122,16 +128,21 @@ func.func @mul_fold_cst() -> (tensor<i32>, tensor<f32>) {
+ // SubtractOp
+ 
+ // CHECK-LABEL: @subtract_fold_cst
+-func.func @subtract_fold_cst() -> (tensor<i32>, tensor<f32>) {
++func.func @subtract_fold_cst() -> (tensor<i32>, tensor<f32>, tensor<?xf32>) {
+   %cst = stablehlo.constant dense<1> : tensor<i32>
+   %cst_1 = stablehlo.constant dense<3> : tensor<i32>
+   %cst_2 = stablehlo.constant dense<1.0> : tensor<f32>
+   %cst_3 = stablehlo.constant dense<3.0> : tensor<f32>
+-  // CHECK: stablehlo.constant dense<2> : tensor<i32>
+-  // CHECK: stablehlo.constant dense<2.0{{.*}}> : tensor<f32>
++  %cst_4 = stablehlo.constant dense<4.0> : tensor<1xf32>
++  %cst_5 = stablehlo.constant dense<5.0> : tensor<1xf32>
++  // CHECK: %[[V1:.+]] = stablehlo.constant dense<2> : tensor<i32>
++  // CHECK: %[[V2:.+]] = stablehlo.constant dense<2.0{{.*}}> : tensor<f32>
++  // CHECK: %[[V3:.+]] = stablehlo.subtract
++  // CHECK: return %[[V1]], %[[V2]], %[[V3]]
+   %0 = stablehlo.subtract %cst_1, %cst : tensor<i32>
+   %1 = stablehlo.subtract %cst_3, %cst_2 : tensor<f32>
+-  return %0, %1 : tensor<i32>, tensor<f32>
++  %2 = stablehlo.subtract %cst_4, %cst_5 : (tensor<1xf32>, tensor<1xf32>) -> tensor<?xf32>
++  return %0, %1, %2 : tensor<i32>, tensor<f32>, tensor<?xf32>
+ }
+ 
+ // -----
+diff --git a/stablehlo/transforms/StablehloAggressiveFolder.cpp b/stablehlo/transforms/StablehloAggressiveFolder.cpp
+index a9107514..dadc14fb 100644
+--- a/stablehlo/transforms/StablehloAggressiveFolder.cpp
++++ b/stablehlo/transforms/StablehloAggressiveFolder.cpp
+@@ -257,6 +257,9 @@ struct FoldAddOpPattern final : OpRewritePattern<mlir::stablehlo::AddOp> {
+ 
+   LogicalResult matchAndRewrite(mlir::stablehlo::AddOp op,
+                                 PatternRewriter& rewriter) const override {
++    if (failed(validateResultTypeForEval(rewriter, op, op.getType())))
++      return failure();
++
+     Value lhs = op.getLhs();
+     Value rhs = op.getRhs();
+ 
+@@ -548,6 +551,9 @@ struct FoldMulOpPattern final : OpRewritePattern<mlir::stablehlo::MulOp> {
+ 
+   LogicalResult matchAndRewrite(mlir::stablehlo::MulOp op,
+                                 PatternRewriter& rewriter) const override {
++    if (failed(validateResultTypeForEval(rewriter, op, op.getType())))
++      return failure();
++
+     auto elemType = op.getType().getElementType();
+     Value lhs = op.getLhs();
+     Value rhs = op.getRhs();
+@@ -747,6 +753,9 @@ struct FoldSubtractOpPattern final
+ 
+   LogicalResult matchAndRewrite(mlir::stablehlo::SubtractOp op,
+                                 PatternRewriter& rewriter) const override {
++    if (failed(validateResultTypeForEval(rewriter, op, op.getType())))
++      return failure();
++
+     Value lhs = op.getLhs();
+     Value rhs = op.getRhs();
+ 
+-- 
+2.47.0
+
diff --git a/mlir-tensorrt/build_tools/scripts/cicd_build.sh b/mlir-tensorrt/build_tools/scripts/cicd_build.sh
index 69747be6c..c0cf621f1 100755
--- a/mlir-tensorrt/build_tools/scripts/cicd_build.sh
+++ b/mlir-tensorrt/build_tools/scripts/cicd_build.sh
@@ -3,29 +3,25 @@ set -ex
 set -o pipefail
 
 REPO_ROOT=$(pwd)
-BUILD_DIR="${BUILD_DIR:=${REPO_ROOT}/build}"
-
-ENABLE_NCCL=${ENABLE_NCCL:OFF}
-RUN_LONG_TESTS=${RUN_LONG_TESTS:-False}
+BUILD_DIR="${REPO_ROOT}/build"
 LLVM_LIT_ARGS=${LLVM_LIT_ARGS:-"-v --xunit-xml-output ${BUILD_DIR}/test-results.xml --timeout=1200 --time-tests -Drun_long_tests=${RUN_LONG_TESTS}"}
-DOWNLOAD_TENSORRT_VERSION=${DOWNLOAD_TENSORRT_VERSION:-10.0.0.6}
+DOWNLOAD_TENSORRT_VERSION=${DOWNLOAD_TENSORRT_VERSION:-10.5}
 ENABLE_ASAN=${ENABLE_ASAN:-OFF}
 
+export CCACHE_DIR="/ccache"
+
+ccache --zero-stats || true
+rm -rf ${BUILD_DIR}  || true
+
 echo "Using DOWNLOAD_TENSORRT_VERSION=${DOWNLOAD_TENSORRT_VERSION}"
 echo "Using LLVM_LIT_ARGS=${LLVM_LIT_ARGS}"
 
-cmake -GNinja -B "${BUILD_DIR}" -S "${REPO_ROOT}" \
-  -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
-  -DMLIR_TRT_USE_LINKER=lld -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-  -DMLIR_TRT_PACKAGE_CACHE_DIR=$PWD/.cache.cpm \
-  -DMLIR_TRT_ENABLE_PYTHON=ON \
-  -DMLIR_TRT_ENABLE_NCCL=${ENABLE_NCCL} \
-  -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION="$DOWNLOAD_TENSORRT_VERSION" \
-  -DLLVM_LIT_ARGS="${LLVM_LIT_ARGS}" \
+cmake --preset ninja-llvm \
+  -DMLIR_TRT_DOWNLOAD_TENSORRT_VERSION="${DOWNLOAD_TENSORRT_VERSION}" \
   -DENABLE_ASAN="${ENABLE_ASAN}" \
-  -DCMAKE_PLATFORM_NO_VERSIONED_SONAME=ON
+  -DLLVM_LIT_ARGS="${LLVM_LIT_ARGS}" \
+  -DCPM_SOURCE_CACHE="/.cache.cpm" \
+  -DMLIR_EXECUTOR_ENABLE_GPU_INTEGRATION_TESTS=OFF
 
-echo "==== Running Build ==="
-ninja -C ${BUILD_DIR} -k 0 check-mlir-executor
-ninja -C ${BUILD_DIR} -k 0 check-mlir-tensorrt
-ninja -C ${BUILD_DIR} -k 0 check-mlir-tensorrt-dialect
+ninja -C ${BUILD_DIR} -k 0 check-all-mlir-tensorrt
+ccache --show-stats || true
diff --git a/mlir-tensorrt/compiler/lib/Conversion/TensorRTToTensorRTRuntime/CMakeLists.txt b/mlir-tensorrt/compiler/lib/Conversion/TensorRTToTensorRTRuntime/CMakeLists.txt
index 050c6de8c..016d30a66 100644
--- a/mlir-tensorrt/compiler/lib/Conversion/TensorRTToTensorRTRuntime/CMakeLists.txt
+++ b/mlir-tensorrt/compiler/lib/Conversion/TensorRTToTensorRTRuntime/CMakeLists.txt
@@ -1,7 +1,7 @@
 add_mlir_tensorrt_library(MLIRTensorRTTensorRTToTensorRTRuntime
   TensorRTToTensorRTRuntime.cpp
 
-  LINK_LIBS
+  LINK_LIBS PUBLIC
   MLIRTensorRTExecutorUtils
   MLIRTensorRTDialect
   MLIRTensorRTTensorRTRuntimeDialect
diff --git a/mlir-tensorrt/test/Dialect/Plan/segmentation-pipeline.mlir b/mlir-tensorrt/test/Dialect/Plan/segmentation-pipeline.mlir
index 577de6208..66e4e79b7 100644
--- a/mlir-tensorrt/test/Dialect/Plan/segmentation-pipeline.mlir
+++ b/mlir-tensorrt/test/Dialect/Plan/segmentation-pipeline.mlir
@@ -224,4 +224,24 @@ builtin.module @simple_gather_dynamic attributes {
 //   CHECK-DAG:       %[[v1:.+]] = stablehlo.reshape %[[v0]] : (tensor<i32>) -> tensor<1xi32>
 //   CHECK-DAG:       %[[v2:.+]] = stablehlo.concatenate %[[c]], %[[v1]], %[[c_0]], %[[c_0]]
 //   CHECK-DAG:       %[[v3:.+]] = "stablehlo.dynamic_gather"(%[[arg1]], %[[arg0]], %[[v2]])
-//   CHECK-DAG:       return %[[v3]] : tensor<?x?x256x256xi32>
\ No newline at end of file
+//   CHECK-DAG:       return %[[v3]] : tensor<?x?x256x256xi32>
+
+// -----
+
+builtin.module attributes {
+  plan.cluster_kinds = [
+    #plan.tensorrt_cluster<benefit = 1, disallow_shape_tensor_calculations=true, tensorrt_major_version=10>,
+    #plan.host_cluster<benefit = 0>
+  ]
+} {
+  func.func @static_type_refinement() -> tensor<?x?xi32>{
+    %c_0 = stablehlo.constant dense<1> : tensor<1x1xi32>
+    %c_1 = stablehlo.constant dense<2> : tensor<1x1xi32>
+    %0 = stablehlo.subtract %c_0, %c_1 : (tensor<1x1xi32>, tensor<1x1xi32>) -> tensor<?x?xi32>
+    return %0 : tensor<?x?xi32>
+  }
+}
+
+// CHECK-LABEL: func.func @static_type_refinement() -> tensor<1x1xi32>
+// CHECK-LABEL: tensorrt.module
+// CHECK: stablehlo.subtract {{.*}} : tensor<1x1xi32>
diff --git a/mlir-tensorrt/test/python/mlir_tensorrt_compiler/compiler_api/test_options_context.py b/mlir-tensorrt/test/python/mlir_tensorrt_compiler/compiler_api/test_options_context.py
index d74198311..95b9c591a 100644
--- a/mlir-tensorrt/test/python/mlir_tensorrt_compiler/compiler_api/test_options_context.py
+++ b/mlir-tensorrt/test/python/mlir_tensorrt_compiler/compiler_api/test_options_context.py
@@ -27,4 +27,4 @@
 
 
 # CHECK: InvalidArgument: InvalidArgument: non-existent-options-type is not a valid option type. Valid options were: stable-hlo-to-executable
-# CHECK: --tensorrt-timing-cache-path= --device-infer-from-host=true --debug-only= --executor-index-bitwidth=64 --entrypoint=main --plan-clustering-disallow-host-tensors-in-tensorrt-clusters=false --tensorrt-workspace-memory-pool-limit=1073741824 --device-max-registers-per-block=65536 --tensorrt-strongly-typed=false --tensorrt-layer-info-dir= --device-compute-capability=86 --debug=false --mlir-print-ir-tree-dir= --disable-tensorrt-extension=false --tensorrt-builder-opt-level=3 --tensorrt-engines-dir=
+# CHECK: Options[{{.*}}]