From 05f9f001c1e7d41678c43b8e1ba5a532c215845a Mon Sep 17 00:00:00 2001 From: Misha Chornyi <99709299+mc-nv@users.noreply.github.com> Date: Tue, 14 Jan 2025 13:55:20 -0800 Subject: [PATCH 1/5] Install python build package inside Winbase build container (#7934) --- build.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.py b/build.py index 1cd03f5e99..15bff402a4 100755 --- a/build.py +++ b/build.py @@ -1048,6 +1048,8 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap): # Install the windows- or linux-specific buildbase dependencies if target_platform() == "windows": df += """ +RUN python3 -m pip install build + SHELL ["cmd", "/S", "/C"] """ else: From 71ee05c3a94e2d68838d5ff1aa0ebc5956a2eb10 Mon Sep 17 00:00:00 2001 From: Jacky <18255193+kthui@users.noreply.github.com> Date: Tue, 14 Jan 2025 14:15:37 -0800 Subject: [PATCH 2/5] ci: Fix L0_lifecycle server shutdown (#7933) --- qa/L0_lifecycle/test.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/qa/L0_lifecycle/test.sh b/qa/L0_lifecycle/test.sh index 4efd244c76..f1a33886ac 100755 --- a/qa/L0_lifecycle/test.sh +++ b/qa/L0_lifecycle/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -1576,7 +1576,7 @@ if [ `grep -c "Model 'custom_zero_1_float32' (version 1) has 1 in-flight inferen RET=1 fi -kill $SERVER_PID +kill $SERVER_PID || true wait $SERVER_PID rm -f $CLIENT_LOG @@ -1614,7 +1614,7 @@ if [ `grep -c "Model 'custom_sequence_int32' (version 1) has 1 in-flight inferen RET=1 fi -kill $SERVER_PID +kill $SERVER_PID || true wait $SERVER_PID rm -f $CLIENT_LOG @@ -1655,7 +1655,7 @@ if [ `grep -c "Model 'ensemble_zero_1_float32' (version 1) has 1 in-flight infer RET=1 fi -kill $SERVER_PID +kill $SERVER_PID || true wait $SERVER_PID LOG_IDX=$((LOG_IDX+1)) @@ -2128,7 +2128,7 @@ if [ $? -ne 0 ]; then fi set -e -kill $SERVER_PID +kill $SERVER_PID || true wait $SERVER_PID LOG_IDX=$((LOG_IDX+1)) From 7fef0f110c8cbcebc838e62b5fdde80a2e18463c Mon Sep 17 00:00:00 2001 From: Yingge He <157551214+yinggeh@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:31:05 -0800 Subject: [PATCH 3/5] fix: Correct triton_container_version (#7942) --- build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.py b/build.py index 15bff402a4..ce069ab8a2 100755 --- a/build.py +++ b/build.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -72,7 +72,7 @@ DEFAULT_TRITON_VERSION_MAP = { "release_version": "2.54.0dev", - "triton_container_version": "24.01dev", + "triton_container_version": "25.01dev", "upstream_container_version": "24.12", "ort_version": "1.20.1", "ort_openvino_version": "2024.4.0", From 0131d380c56ca6c22bcbcdb65a647bd05ca056b2 Mon Sep 17 00:00:00 2001 From: Indrajit Bhosale Date: Wed, 15 Jan 2025 16:04:47 -0800 Subject: [PATCH 4/5] fix: Fix L0_implicit_state and it's variants (#7941) Server waits for max_sequence_idle_microseconds before closing the request sequence and expects the client to start a NEW sequence for ANY subsequent request. Test was failing because the max_sequence_idle_microseconds was set to low causing the server to believe the sequence is over and reject any further requests with SAME sequence_id without the START flag. Causing the test to fail. --- qa/L0_implicit_state/models/growable_memory/config.pbtxt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/qa/L0_implicit_state/models/growable_memory/config.pbtxt b/qa/L0_implicit_state/models/growable_memory/config.pbtxt index 0a7920bdf1..7edd4da25c 100644 --- a/qa/L0_implicit_state/models/growable_memory/config.pbtxt +++ b/qa/L0_implicit_state/models/growable_memory/config.pbtxt @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -28,6 +28,8 @@ name: "growable_memory" backend: "implicit_state" max_batch_size: 0 sequence_batching { + # Set large idle timeout to avoid inter-request timeouts for test consistency + max_sequence_idle_microseconds: 10000000 control_input [ { name: "START" From f7fe649aa26de059dc55c9b02dfe515e0fd45a1f Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Thu, 16 Jan 2025 09:42:54 -0800 Subject: [PATCH 5/5] test: Stabilize L0_perf_analyzer_capi test consistency (#7946) --- qa/L0_perf_analyzer_capi/test.sh | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/qa/L0_perf_analyzer_capi/test.sh b/qa/L0_perf_analyzer_capi/test.sh index 3e3f9e4af6..53196fa762 100755 --- a/qa/L0_perf_analyzer_capi/test.sh +++ b/qa/L0_perf_analyzer_capi/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -218,6 +218,7 @@ if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then fi $PERF_ANALYZER -v -m simple_savedmodel_sequence_object -p 2000 -t5 --sync \ +-s ${STABILITY_THRESHOLD} \ --input-data=$SEQ_JSONDATAFILE \ --service-kind=triton_c_api --model-repository=$DATADIR \ --triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1 @@ -234,6 +235,7 @@ fi set +e $PERF_ANALYZER -v -m graphdef_sequence_float32 --shape INPUT:2 \ +-s ${STABILITY_THRESHOLD} \ --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \ --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 \ --service-kind=triton_c_api --model-repository=$DATADIR \ @@ -250,21 +252,9 @@ if [ $(cat $CLIENT_LOG | grep -P "The supplied shape .+ is incompatible with th fi set -e -# Negative test for the async mode. -set +e -$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 -a \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \ ->$CLIENT_LOG 2>&1 -if [ $(cat $CLIENT_LOG | grep "not supported by triton_c_api service" | wc -l) -ne 1 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - for SHARED_MEMORY_TYPE in system cuda; do $PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \ + -s ${STABILITY_THRESHOLD} \ --shared-memory=$SHARED_MEMORY_TYPE \ --service-kind=triton_c_api --model-repository=$DATADIR \ --triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1