From 7354b2193edd53a8cac2e6ad88efe6955677bb7c Mon Sep 17 00:00:00 2001 From: Israel Martinez Date: Tue, 20 Feb 2024 15:13:40 -0500 Subject: [PATCH] Use awscli python instead of calling subprocess. Fixes issue with PATH in Jupyter. Also add docstring --- cosipy/util/data_fetching.py | 45 +++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/cosipy/util/data_fetching.py b/cosipy/util/data_fetching.py index f4324ffe..1d8dce02 100644 --- a/cosipy/util/data_fetching.py +++ b/cosipy/util/data_fetching.py @@ -1,23 +1,46 @@ -import subprocess, os +import os +from awscli.clidriver import create_clidriver def fetch_wasabi_file(file, output = None, override = False, bucket = 'cosi-pipeline-public', endpoint = 'https://s3.us-west-1.wasabisys.com', - access_key_id = 'GBAL6XATQZNRV3GFH9Y4', - access_key = 'GToOczY5hGX3sketNO2fUwiq4DJoewzIgvTCHoOv'): + access_key = 'GBAL6XATQZNRV3GFH9Y4', + secret_key = 'GToOczY5hGX3sketNO2fUwiq4DJoewzIgvTCHoOv'): + """ + Download a file from COSI's Wasabi acccount. + Parameters + ---------- + file: str + Full path to file in Wasabi + output: str + Full path to the downloaded file in the local system. By default it will use + the current durectory and the same file name as the input file. + bucket: str + Passed to aws --bucket option + endpoint: str: + Passed to aws --endpoint-url option + access_key: str + AWS_ACCESS_KEY_ID + secret_key: str + AWS_SECRET_ACCESS_KEY + """ + if output is None: output = file.split('/')[-1] if os.path.exists(output) and not override: raise RuntimeError(f"File {output} already exists.") - - subprocess.run(['aws', 's3api', 'get-object', - '--bucket', bucket, - '--key', file, - '--endpoint-url', endpoint, - output], - env = os.environ.copy() | {'AWS_ACCESS_KEY_ID':access_key_id, - 'AWS_SECRET_ACCESS_KEY':access_key}) + + cli = create_clidriver() + + cli.session.set_credentials(access_key, secret_key) + + cli.main(['s3api', 'get-object', + '--bucket', bucket, + '--key', file, + '--endpoint-url', endpoint, + output]) +