Skip to content

Commit

Permalink
Use awscli python instead of calling subprocess. Fixes issue with PAT…
Browse files Browse the repository at this point in the history
…H in Jupyter. Also add docstring
  • Loading branch information
israelmcmc committed Feb 20, 2024
1 parent 1d4f348 commit 7354b21
Showing 1 changed file with 34 additions and 11 deletions.
45 changes: 34 additions & 11 deletions cosipy/util/data_fetching.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,46 @@
import subprocess, os
import os
from awscli.clidriver import create_clidriver

def fetch_wasabi_file(file,
output = None,
override = False,
bucket = 'cosi-pipeline-public',
endpoint = 'https://s3.us-west-1.wasabisys.com',
access_key_id = 'GBAL6XATQZNRV3GFH9Y4',
access_key = 'GToOczY5hGX3sketNO2fUwiq4DJoewzIgvTCHoOv'):
access_key = 'GBAL6XATQZNRV3GFH9Y4',
secret_key = 'GToOczY5hGX3sketNO2fUwiq4DJoewzIgvTCHoOv'):
"""
Download a file from COSI's Wasabi acccount.
Parameters
----------
file: str
Full path to file in Wasabi
output: str
Full path to the downloaded file in the local system. By default it will use
the current durectory and the same file name as the input file.
bucket: str
Passed to aws --bucket option
endpoint: str:
Passed to aws --endpoint-url option
access_key: str
AWS_ACCESS_KEY_ID
secret_key: str
AWS_SECRET_ACCESS_KEY
"""

if output is None:
output = file.split('/')[-1]

if os.path.exists(output) and not override:
raise RuntimeError(f"File {output} already exists.")

subprocess.run(['aws', 's3api', 'get-object',
'--bucket', bucket,
'--key', file,
'--endpoint-url', endpoint,
output],
env = os.environ.copy() | {'AWS_ACCESS_KEY_ID':access_key_id,
'AWS_SECRET_ACCESS_KEY':access_key})

cli = create_clidriver()

cli.session.set_credentials(access_key, secret_key)

cli.main(['s3api', 'get-object',
'--bucket', bucket,
'--key', file,
'--endpoint-url', endpoint,
output])

0 comments on commit 7354b21

Please sign in to comment.