Skip to content

Commit

Permalink
Merge pull request siliconcompiler#2376 from siliconcompiler/add-call…
Browse files Browse the repository at this point in the history
…-to-get-final

add call in client to receive final run file to pass along logs and r…
  • Loading branch information
gadfort authored Apr 25, 2024
2 parents 9653dca + 7e0b657 commit d911a97
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 15 deletions.
33 changes: 19 additions & 14 deletions siliconcompiler/remote/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,16 @@ def __remote_run_loop(chip, check_interval):
all_nodes.append(f'{step}{index}')
completed = []
result_procs = []

def schedule_download(node):
node_proc = multiprocessor.Process(target=fetch_results,
args=(chip, node))
node_proc.start()
result_procs.append(node_proc)
if node is None:
node = 'final result'
chip.logger.info(f' {node}')

while is_busy:
time.sleep(check_interval)
new_completed, is_busy = check_progress(chip)
Expand All @@ -407,20 +417,15 @@ def __remote_run_loop(chip, check_interval):
if nodes_to_fetch:
chip.logger.info(' Fetching completed results:')
for node in nodes_to_fetch:
node_proc = multiprocessor.Process(target=fetch_results,
args=(chip, node))
node_proc.start()
result_procs.append(node_proc)
chip.logger.info(f' {node}')
schedule_download(node)

# Done: try to fetch any node results which still haven't been retrieved.
chip.logger.info('Remote job completed! Retrieving final results...')
for node in all_nodes:
if node not in completed:
node_proc = multiprocessor.Process(target=fetch_results,
args=(chip, node))
node_proc.start()
result_procs.append(node_proc)
schedule_download(node)
schedule_download(None)

# Make sure all results are fetched before letting the client issue
# a deletion request.
for proc in result_procs:
Expand Down Expand Up @@ -659,7 +664,10 @@ def success_action(resp):
def error_action(code, msg):
# Results are fetched in parallel, and a failure in one node
# does not necessarily mean that the whole job failed.
chip.logger.warning(f'Could not fetch results for node: {node}')
if node:
chip.logger.warning(f'Could not fetch results for node: {node}')
else:
chip.logger.warning('Could not fetch results for final results.')
return 404

return __post(chip,
Expand Down Expand Up @@ -690,12 +698,9 @@ def fetch_results(chip, node):

# Note: the server should eventually delete the results as they age out (~8h), but this will
# give us a brief period to look at failed results.
if not node and results_code:
if results_code:
chip.error("Sorry, something went wrong and your job results could not be retrieved. "
f"(Response code: {results_code})", fatal=True)
if node and results_code:
# nothing was received no need to unzip
return

# Unzip the results.
# Unauthenticated jobs get a gzip archive, authenticated jobs get nested archives.
Expand Down
11 changes: 10 additions & 1 deletion siliconcompiler/remote/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import fastjsonschema
from pathlib import Path
from fastjsonschema import JsonSchemaException
import io

from siliconcompiler import Chip, Schema
from siliconcompiler._metadata import version as sc_version
Expand Down Expand Up @@ -252,7 +253,7 @@ async def handle_get_results(self, request):
return response

job_hash = job_params['job_hash']
node = job_params['node'] if 'node' in job_params else ''
node = job_params['node'] if 'node' in job_params else None

resp = web.StreamResponse(
status=200,
Expand All @@ -265,6 +266,14 @@ async def handle_get_results(self, request):
await resp.prepare(request)

zipfn = os.path.join(self.nfs_mount, job_hash, f'{job_hash}_{node}.tar.gz')
if not node:
with tarfile.open(zipfn, 'w:gz') as tar:
text = "Done"
metadata_file = io.BytesIO(text.encode('ascii'))
tarinfo = tarfile.TarInfo(f'{job_hash}/done')
tarinfo.size = metadata_file.getbuffer().nbytes
tar.addfile(tarinfo=tarinfo, fileobj=metadata_file)

with open(zipfn, 'rb') as zipf:
await resp.write(zipf.read())

Expand Down

0 comments on commit d911a97

Please sign in to comment.