Skip to content

Commit

Permalink
Enhance testing for pending request count (#6532) (#6545)
Browse files Browse the repository at this point in the history
* Enhance testing for pending request count

* Improve the documentation

* Add more documentation
  • Loading branch information
tanmayv25 authored Nov 9, 2023
1 parent 6b39818 commit d42ff6e
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 9 deletions.
54 changes: 46 additions & 8 deletions qa/L0_metrics/metrics_queue_size_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,24 @@ def setUp(self):
url=self.server_url, concurrency=self.concurrency
)

def _validate_model_config(self, model_name):
# Test specific configurations
self.max_queue_size = 0

def _validate_model_config(self, model_name, max_queue_size=0):
config = self.client.get_model_config(model_name)
print(config)
params = config.get("parameters", {})
delay_ms = int(params.get("execute_delay_ms", {}).get("string_value"))
max_batch_size = config.get("max_batch_size")
self.assertEqual(delay_ms, self.delay_ms)
self.assertEqual(max_batch_size, self.max_batch_size)

dynamic_batching = config.get("dynamic_batching", {})
default_queue_policy = dynamic_batching.get("default_queue_policy", {})
self.max_queue_size = default_queue_policy.get("max_queue_size", 0)

self.assertEqual(self.max_queue_size, max_queue_size)

return config

def _get_metrics(self):
Expand Down Expand Up @@ -148,8 +158,10 @@ def _send_async_requests_sequence(self, num_seq_slots, model_name, inputs, futur
)
num_sent += 1

def _test_helper(self, model_name, batch_size, send_requests_func):
self._validate_model_config(model_name)
def _test_helper(
self, model_name, batch_size, send_requests_func, max_queue_size=0
):
self._validate_model_config(model_name, max_queue_size=max_queue_size)

queue_size = QUEUE_METRIC_TEMPLATE.format(model_name=model_name)
infer_count = INFER_METRIC_TEMPLATE.format(model_name=model_name)
Expand All @@ -162,9 +174,16 @@ def _test_helper(self, model_name, batch_size, send_requests_func):
# Give Triton a second to load all requests into queues
time.sleep(1)

starting_queue_size = self.num_requests - batch_size
# Start from (num_requests-batch_size) because 1 batch should be executing,
# and the rest of the requests should be queued.
# If max_queue_size is specified then the queued requests would be capped
# at max_queue_size.
if max_queue_size != 0:
self._assert_metric_equals(queue_size, max_queue_size)
starting_queue_size = max_queue_size
else:
starting_queue_size = self.num_requests - batch_size

for expected_queue_size in range(starting_queue_size, 0, -1 * batch_size):
self._assert_metric_equals(queue_size, expected_queue_size)
time.sleep(self.delay_sec)
Expand All @@ -174,13 +193,21 @@ def _test_helper(self, model_name, batch_size, send_requests_func):
time.sleep(self.delay_sec)

# All requests should've been executed without any batching
self._assert_metric_equals(infer_count, self.num_requests)
expected_exec_count = math.ceil(self.num_requests / batch_size)
expected_infer_count = starting_queue_size + batch_size
self._assert_metric_equals(infer_count, expected_infer_count)
expected_exec_count = math.ceil(expected_infer_count / batch_size)
self._assert_metric_equals(exec_count, expected_exec_count)

# Verify no inference exceptions were raised
failed_count = 0
for future in futures:
future.get_result()
try:
future.get_result()
except Exception as e:
failed_count = failed_count + 1

self.assertEqual(
failed_count, self.num_requests - batch_size - starting_queue_size
)

def test_default_scheduler(self):
model_name = "default"
Expand All @@ -194,6 +221,17 @@ def test_dynamic_batch_scheduler(self):
batch_size = self.max_batch_size
self._test_helper(model_name, batch_size, self._send_async_requests)

def test_fail_max_queue_size(self):
model_name = "max_queue_size"
# This test checks whether metrics are properly accounts for requests
# that fail to enqueue on the server. The test sets the max_queue_size
# and any additional requests beyond the specified queue size should fail
# instead of waiting for execution.
batch_size = self.max_batch_size
self._test_helper(
model_name, batch_size, self._send_async_requests, max_queue_size=4
)

def test_sequence_batch_scheduler_direct(self):
model_name = "sequence_direct"
# With sufficient queue delay and minimum_slot_utilization set, we
Expand Down
6 changes: 5 additions & 1 deletion qa/L0_metrics/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,10 @@ DYNAMIC_MODEL="${MODELDIR}/dynamic"
cp -r "${DEFAULT_MODEL}" "${DYNAMIC_MODEL}"
echo -e "\ndynamic_batching { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US} }\n" >> "${DYNAMIC_MODEL}/config.pbtxt"

MAX_QUEUE_SIZE_MODEL="${MODELDIR}/max_queue_size"
cp -r "${DEFAULT_MODEL}" "${MAX_QUEUE_SIZE_MODEL}"
echo -e "\ndynamic_batching { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US} default_queue_policy { max_queue_size: 4 } }\n" >> "${MAX_QUEUE_SIZE_MODEL}/config.pbtxt"

SEQUENCE_DIRECT_MODEL="${MODELDIR}/sequence_direct"
cp -r "${DEFAULT_MODEL}" "${SEQUENCE_DIRECT_MODEL}"
echo -e "\nsequence_batching { direct { max_queue_delay_microseconds: ${MAX_QUEUE_DELAY_US}, minimum_slot_utilization: 1.0 } }\n" >> "${SEQUENCE_DIRECT_MODEL}/config.pbtxt"
Expand All @@ -347,7 +351,7 @@ run_and_check_server
python3 ${PYTHON_TEST} 2>&1 | tee ${CLIENT_LOG}
kill $SERVER_PID
wait $SERVER_PID
expected_tests=5
expected_tests=6
check_unit_test "${expected_tests}"

if [ $RET -eq 0 ]; then
Expand Down

0 comments on commit d42ff6e

Please sign in to comment.