Skip to content

Commit

Permalink
Merge pull request #118 from dmargala/shared-run-add-initial-wait
Browse files Browse the repository at this point in the history
Add initial delay and scale wait parameters with ntasks
  • Loading branch information
scanon authored Dec 5, 2024
2 parents 6105915 + d445a26 commit 517d5a4
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions podman_hpc/podman_hpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import sys
import os
import math
import socket
import re
import time
Expand All @@ -14,6 +15,14 @@
from subprocess import Popen, PIPE


def _round_nearest(x, a):
return round(x / a) * a


def _param_scale_log2(x, p):
return _round_nearest(p*(1 + math.log2(x)), p)


def podman_devnull(cmd, conf):
"""
Run a command and ignore the output.
Expand Down Expand Up @@ -310,9 +319,13 @@ def _shared_run(conf, run_args, **site_opts):
# wait for container to exist
comm = ["container", "exists", container_name]
start_time = time.time()
while podman_devnull(comm, conf) != 0:
time.sleep(conf.wait_poll_interval)
if time.time() - start_time > conf.wait_timeout:
wait_poll_interval = _param_scale_log2(ntasks, conf.wait_poll_interval)
wait_timeout = _param_scale_log2(ntasks, conf.wait_timeout)
while True:
time.sleep(wait_poll_interval)
if podman_devnull(comm, conf) == 0:
break
if time.time() - start_time > wait_timeout:
msg = "Timeout waiting for shared-run start"
raise OSError(msg)
if run_thread and run_thread.exitcode:
Expand Down

0 comments on commit 517d5a4

Please sign in to comment.