From 05f9f001c1e7d41678c43b8e1ba5a532c215845a Mon Sep 17 00:00:00 2001
From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com>
Date: Tue, 14 Jan 2025 13:55:20 -0800
Subject: [PATCH 1/5] Install python build package inside Winbase build
 container (#7934)

---
 build.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/build.py b/build.py
index 1cd03f5e99..15bff402a4 100755
--- a/build.py
+++ b/build.py
@@ -1048,6 +1048,8 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
     # Install the windows- or linux-specific buildbase dependencies
     if target_platform() == "windows":
         df += """
+RUN python3 -m pip install build
+
 SHELL ["cmd", "/S", "/C"]
 """
     else:

From 71ee05c3a94e2d68838d5ff1aa0ebc5956a2eb10 Mon Sep 17 00:00:00 2001
From: Jacky <18255193+kthui@users.noreply.github.com>
Date: Tue, 14 Jan 2025 14:15:37 -0800
Subject: [PATCH 2/5] ci: Fix L0_lifecycle server shutdown (#7933)

---
 qa/L0_lifecycle/test.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/qa/L0_lifecycle/test.sh b/qa/L0_lifecycle/test.sh
index 4efd244c76..f1a33886ac 100755
--- a/qa/L0_lifecycle/test.sh
+++ b/qa/L0_lifecycle/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -1576,7 +1576,7 @@ if [ `grep -c "Model 'custom_zero_1_float32' (version 1) has 1 in-flight inferen
     RET=1
 fi
 
-kill $SERVER_PID
+kill $SERVER_PID || true
 wait $SERVER_PID
 
 rm -f $CLIENT_LOG
@@ -1614,7 +1614,7 @@ if [ `grep -c "Model 'custom_sequence_int32' (version 1) has 1 in-flight inferen
     RET=1
 fi
 
-kill $SERVER_PID
+kill $SERVER_PID || true
 wait $SERVER_PID
 
 rm -f $CLIENT_LOG
@@ -1655,7 +1655,7 @@ if [ `grep -c "Model 'ensemble_zero_1_float32' (version 1) has 1 in-flight infer
     RET=1
 fi
 
-kill $SERVER_PID
+kill $SERVER_PID || true
 wait $SERVER_PID
 
 LOG_IDX=$((LOG_IDX+1))
@@ -2128,7 +2128,7 @@ if [ $? -ne 0 ]; then
 fi
 set -e
 
-kill $SERVER_PID
+kill $SERVER_PID || true
 wait $SERVER_PID
 
 LOG_IDX=$((LOG_IDX+1))

From 7fef0f110c8cbcebc838e62b5fdde80a2e18463c Mon Sep 17 00:00:00 2001
From: Yingge He <157551214+yinggeh@users.noreply.github.com>
Date: Wed, 15 Jan 2025 15:31:05 -0800
Subject: [PATCH 3/5] fix: Correct triton_container_version (#7942)

---
 build.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build.py b/build.py
index 15bff402a4..ce069ab8a2 100755
--- a/build.py
+++ b/build.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -72,7 +72,7 @@
 
 DEFAULT_TRITON_VERSION_MAP = {
     "release_version": "2.54.0dev",
-    "triton_container_version": "24.01dev",
+    "triton_container_version": "25.01dev",
     "upstream_container_version": "24.12",
     "ort_version": "1.20.1",
     "ort_openvino_version": "2024.4.0",

From 0131d380c56ca6c22bcbcdb65a647bd05ca056b2 Mon Sep 17 00:00:00 2001
From: Indrajit Bhosale <iamindrajitb@gmail.com>
Date: Wed, 15 Jan 2025 16:04:47 -0800
Subject: [PATCH 4/5] fix: Fix L0_implicit_state and it's variants (#7941)

Server waits for max_sequence_idle_microseconds before closing the request sequence and expects the client to start a NEW sequence for ANY subsequent request.
Test was failing because the max_sequence_idle_microseconds was set to low causing the server to believe the sequence is over and reject any further requests with SAME sequence_id without the START flag. Causing the test to fail.
---
 qa/L0_implicit_state/models/growable_memory/config.pbtxt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/qa/L0_implicit_state/models/growable_memory/config.pbtxt b/qa/L0_implicit_state/models/growable_memory/config.pbtxt
index 0a7920bdf1..7edd4da25c 100644
--- a/qa/L0_implicit_state/models/growable_memory/config.pbtxt
+++ b/qa/L0_implicit_state/models/growable_memory/config.pbtxt
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -28,6 +28,8 @@ name: "growable_memory"
 backend: "implicit_state"
 max_batch_size: 0
 sequence_batching {
+  # Set large idle timeout to avoid inter-request timeouts for test consistency
+  max_sequence_idle_microseconds: 10000000
   control_input [
     {
       name: "START"

From f7fe649aa26de059dc55c9b02dfe515e0fd45a1f Mon Sep 17 00:00:00 2001
From: Ryan McCormick <rmccormick@nvidia.com>
Date: Thu, 16 Jan 2025 09:42:54 -0800
Subject: [PATCH 5/5] test: Stabilize L0_perf_analyzer_capi test consistency
 (#7946)

---
 qa/L0_perf_analyzer_capi/test.sh | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/qa/L0_perf_analyzer_capi/test.sh b/qa/L0_perf_analyzer_capi/test.sh
index 3e3f9e4af6..53196fa762 100755
--- a/qa/L0_perf_analyzer_capi/test.sh
+++ b/qa/L0_perf_analyzer_capi/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -218,6 +218,7 @@ if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then
 fi
 
 $PERF_ANALYZER -v -m  simple_savedmodel_sequence_object -p 2000 -t5 --sync \
+-s ${STABILITY_THRESHOLD} \
 --input-data=$SEQ_JSONDATAFILE \
 --service-kind=triton_c_api --model-repository=$DATADIR \
 --triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1
@@ -234,6 +235,7 @@ fi
 
 set +e
 $PERF_ANALYZER -v -m graphdef_sequence_float32 --shape INPUT:2 \
+-s ${STABILITY_THRESHOLD} \
 --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \
 --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 \
 --service-kind=triton_c_api --model-repository=$DATADIR \
@@ -250,21 +252,9 @@ if [ $(cat $CLIENT_LOG |  grep -P "The supplied shape .+ is incompatible with th
 fi
 set -e
 
-# Negative test for the async mode.
-set +e
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 -a \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
->$CLIENT_LOG 2>&1
-if [ $(cat $CLIENT_LOG | grep "not supported by triton_c_api service" | wc -l) -ne 1 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-set -e
-
 for SHARED_MEMORY_TYPE in system cuda; do
     $PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \
+    -s ${STABILITY_THRESHOLD} \
     --shared-memory=$SHARED_MEMORY_TYPE \
     --service-kind=triton_c_api --model-repository=$DATADIR \
     --triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1