Skip to content

Commit

Permalink
update pipeline
Browse files Browse the repository at this point in the history
Signed-off-by: Bhumika Rao <[email protected]>
  • Loading branch information
bhumirao committed Apr 2, 2023
1 parent ba1c500 commit 5b2c02e
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 107 deletions.
7 changes: 6 additions & 1 deletion Data/Soil/.gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
/GlobalSoilOrganicCarbonDensityinkgCarbon_m2to1meterdepth.tif

/data
/metadata
/Global Soil Organic Carbon Density in kg Carbon_m2 to 1 meter depth.zip
/CONTENTS.txt
/TERMS OF USE.txt
/soilcarbon.ovr
/processed
/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif
/.botmKQAeq3934dqKuUWyps.tmp
/.kfLrvZdTQqnxpJRSCrHh96.tmp

This file was deleted.

26 changes: 0 additions & 26 deletions Data/Soil/process.py

This file was deleted.

4 changes: 4 additions & 0 deletions Data/Soil/soilcarbon.ovr.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
outs:
- md5: 33956e4dd24c1caa1dcea956e85e1f5f
size: 131473
path: soilcarbon.ovr
23 changes: 23 additions & 0 deletions dvc.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
schema: '2.0'
stages:
clean:
cmd: rm -rf tmp_unzip_path
load:
cmd:
- dvc push
extract:
cmd:
- python scripts/extract.py
- python scripts/rename_files.py
transform:
cmd:
- "python scripts/transform.py --input tmp_unzip_path/data/commonData_Data0_soilcarbon.ovr\
\ \\\n --output Data/Soil/processed/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif"
deps:
- path: tmp_unzip_path/data/commonData_Data0_soilcarbon.ovr
md5: 33956e4dd24c1caa1dcea956e85e1f5f
size: 131473
outs:
- path: Data/Soil/processed/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif
md5: 96f78155b79a835f56d019586d4c1f14
size: 1038282
21 changes: 21 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
stages:
extract:
cmd:
- python scripts/extract.py
- python scripts/rename_files.py
transform:
cmd:
- >-
python scripts/transform.py --input tmp_unzip_path/data/commonData_Data0_soilcarbon.ovr \
--output Data/Soil/processed/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif
deps:
- tmp_unzip_path/data/commonData_Data0_soilcarbon.ovr
outs:
- Data/Soil/processed/GlobalSoilOrganicCarbonDensityinkgCm_1mDepth.tif
load:
cmd:
- dvc push
clean:
cmd: rm -rf tmp_unzip_path


32 changes: 32 additions & 0 deletions scripts/extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import argparse
import requests
import zipfile
import io
import os

URL = "https://databasin2-filestore.s3.amazonaws.com/a4cb6d367eae4e52a08902874f8bfedf/download/a4cb6d367eae4e52a08902874f8bfedf_1_zip_en.zip?Signature=148HP0SJFI49y7HmTmOaNMAlUDw%3D&Expires=1680448953&AWSAccessKeyId=AKIAI4RK5BEPK3FCQPUQ"

def ensure_url_is_accessible(URL):
r = requests.get(URL)
if not r.ok:
print("Download link expired. Please update download link")
else:
download_and_unzip_files(r.content)

def download_and_unzip_files(content):
current_directory = os.getcwd()
target_parent_dir = os.path.join(current_directory, r'tmp_unzip_path')
if not os.path.exists(target_parent_dir):
os.mkdir(target_parent_dir)
try:
z = zipfile.ZipFile(io.BytesIO(content))
z.extractall(target_parent_dir)
except Exception as e:
print(e)
else:
print("unzipped successfully")

ensure_url_is_accessible(URL)



12 changes: 12 additions & 0 deletions scripts/rename_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import os

current_directory = os.getcwd()
target_parent_dir = os.path.join(current_directory, r'tmp_unzip_path/data')
if os.path.exists(target_parent_dir):
for file_name in os.listdir(target_parent_dir):
if '\\' in file_name:
old_file_name = os.path.join(target_parent_dir, file_name)
filename = os.fsdecode(file_name)
changed_name = filename.replace("\\", "_")
new_file_name = os.path.join(target_parent_dir, changed_name)
os.rename(old_file_name,new_file_name)
22 changes: 22 additions & 0 deletions scripts/transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import subprocess
import argparse
import os

parser = argparse.ArgumentParser()
parser.add_argument('--input', help="Directory of file to transform")
parser.add_argument('--output', help="Directory for transformed files")
args = vars(parser.parse_args())


def run_shell_cmd(cmd):
try:
p = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
last_stdout_bytes, last_stderr_bytes = p.communicate()
if last_stdout_bytes:
return last_stdout_bytes.decode('utf-8', 'replace')
else:
return last_stderr_bytes
except Exception as e:
print(e)

run_shell_cmd("gdalwarp -s_srs EPSG:4326 -t_srs EPSG:4326 -to SRC_METHOD=NO_GEOTRANSFORM -tr 0.5 0.5 -r near -te -180.0 -90.0 180.0 90.0 -te_srs EPSG:4326 -of GTiff " + args.get('input') + " " + args.get('output'))

0 comments on commit 5b2c02e

Please sign in to comment.