From 1616e0760c5930dab1768f75b5dbe6eea3e571c3 Mon Sep 17 00:00:00 2001 From: nstarman Date: Wed, 20 Nov 2024 14:11:39 -0500 Subject: [PATCH] feat: use pooch for data management Signed-off-by: nstarman --- brutus/data.py | 44 ++++++++++++++++ brutus/utils.py | 136 ++++++++++++++++++++---------------------------- setup.py | 19 ++++--- 3 files changed, 112 insertions(+), 87 deletions(-) create mode 100644 brutus/data.py diff --git a/brutus/data.py b/brutus/data.py new file mode 100644 index 0000000..18c2c98 --- /dev/null +++ b/brutus/data.py @@ -0,0 +1,44 @@ +"""Data module.""" + +__all__ = ["strato"] + +import pooch + +_dois = { + "MIST_1.2_iso_vvcrit0.0.h5": "10.7910/DVN/FZMFQY/BKAG41", + "MIST_1.2_iso_vvcrit0.4.h5": "10.7910/DVN/FZMFQY/PRGJIP", + "MIST_1.2_EEPtrk.h5": "10.7910/DVN/JV866N/FJ5NNO", + "bayestar2019_v1.h5": "10.7910/DVN/G49MEI/Y9UZPG", + "grid_mist_v9.h5": "10.7910/DVN/7BA4ZG/Z7MGA7", + "grid_mist_v8.h5": "10.7910/DVN/7BA4ZG/NKVZFT", + "grid_bayestar_v5.h5": "10.7910/DVN/7BA4ZG/LLZP0B", + # "offsets_mist_v9.txt": "10.7910/DVN/L7D1FY/XXXXXX", + "offsets_mist_v8.txt": "10.7910/DVN/L7D1FY/QTNKKN", + "offsets_bs_v9.txt": "10.7910/DVN/L7D1FY/W4O6NJ", + "nn_c3k.h5": "10.7910/DVN/MSCY2O/XHU1VJ", +} + +strato = pooch.create( + path=pooch.os_cache("astro-brutus"), + base_url="https://dataverse.harvard.edu/api/access/datafile/", + registry={ + "MIST_1.2_iso_vvcrit0.0.h5": "ac46048acb9c9c1c10f02ac1bd958a8c4dd80498923297907fd64c5f3d82cb57", + "MIST_1.2_iso_vvcrit0.4.h5": "25d97db9760df5e4e3b65c686a04d5247cae5027c55683e892acb7d1a05c30f7", + "MIST_1.2_EEPtrk.h5": "001558c1b32f4a85ea9acca3ad3f7332a565167da3f6164a565c3f3f05afc11b", + "bayestar2019_v1.h5": "73064ab18f4d1d57b356f7bd8cbcc77be836f090f660cca6727da85ed973d1e6", + "grid_mist_v9.h5": "7d128a5caded78ca9d1788a8e6551b4329aeed9ca74e7a265e531352ecb75288", + "grid_mist_v8.h5": "b07d9c19e7ff5e475b1b061af6d1bb4ebd13e0e894fd0703160206964f1084e0", + "grid_bayestar_v5.h5": "c5d195430393ebd6c8865a9352c8b0906b2c43ec56d3645bb9d5b80e6739fd0c", + # "offsets_mist_v9.txt": None, + "offsets_mist_v8.txt": "35425281b5d828431ca5ef93262cb7c6f406814b649d7e7ca4866b8203408e5f", + "offsets_bs_v9.txt": "b5449c08eb7b894b6d9aa1449a351851ca800ef4ed461c987434a0c250cba386", + "nn_c3k.h5": "bc86d4bf55b2173b97435d24337579a2f337e80ed050c73f1e31abcd04163259", + }, + # The name of an environment variable that can overwrite the path + env="ASTRO_BRUTUS_DATA_DIR", + retry_if_failed=3, +) +# Need to customize the URLs since pooch doesn't know how to build the URL from the base path using the doi +strato.urls = { + k: f"{strato.base_url}:persistentId?persistentId=doi:{v}" for k, v in _dois.items() +} diff --git a/brutus/utils.py b/brutus/utils.py index b118a7b..c5ac6bf 100644 --- a/brutus/utils.py +++ b/brutus/utils.py @@ -6,17 +6,20 @@ """ -from __future__ import (print_function, division) -from six.moves import range +from __future__ import division, print_function +import pathlib import os import sys -import numpy as np +from math import erf, gamma, log, sqrt + import h5py -from scipy.special import xlogy, gammaln +import numpy as np from numba import jit +from scipy.special import gammaln, xlogy +from six.moves import range -from math import log, gamma, erf, sqrt +from .data import strato try: from scipy.special import logsumexp @@ -344,7 +347,20 @@ def _get_seds(mag_coeffs, av, rv, return_flux=False): return seds, rvecs, drvecs -def fetch_isos(target_dir='.', iso="MIST_1.2_vvcrit0.0"): +def _fetch(name, symlink_dir): + """Fetch name using Pooch, making a symlink at symlink_dir.""" + fpath = strato.fetch(name, progressbar=True) + fpath = pathlib.Path(fpath) + + target_path = pathlib.Path(symlink_dir).resolve() / name + target_path.parent.mkdir(parents=True, exist_ok=True) + if not target_path.exists(): + target_path.symlink_to(fpath) + + return target_path + + +def fetch_isos(target_dir=".", iso="MIST_1.2_vvcrit0.0"): """ Download isochrone to target directory. @@ -361,24 +377,17 @@ def fetch_isos(target_dir='.', iso="MIST_1.2_vvcrit0.0"): """ - if iso == 'MIST_1.2_vvcrit0.0': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/FZMFQY/BKAG41') - alias = '-O {}/MIST_1.2_iso_vvcrit0.0.h5'.format(target_dir) - elif iso == 'MIST_1.2_vvcrit0.4': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/FZMFQY/PRGJIP') - alias = '-O {}/MIST_1.2_iso_vvcrit0.4.h5'.format(target_dir) + if iso == "MIST_1.2_vvcrit0.0": + name = "MIST_1.2_iso_vvcrit0.0.h5" + elif iso == "MIST_1.2_vvcrit0.4": + name = "MIST_1.2_iso_vvcrit0.4.h5" else: raise ValueError("The specified isochrone file does not exist!") - res = os.system('wget {} {}'.format(alias, url)) + return _fetch(name, target_dir) - if res > 0: - raise RuntimeError("The file failed to download!") - -def fetch_tracks(target_dir='.', track="MIST_1.2_vvcrit0.0"): +def fetch_tracks(target_dir=".", track="MIST_1.2_vvcrit0.0"): """ Download evolutionary tracks to target directory. @@ -393,21 +402,15 @@ def fetch_tracks(target_dir='.', track="MIST_1.2_vvcrit0.0"): - 'MIST_1.2_vvcrit0.0' (default), the non-rotating MIST v1.2 isochrones """ - - if track == 'MIST_1.2_vvcrit0.0': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/JV866N/FJ5NNO') - alias = '-O {}/MIST_1.2_EEPtrk.h5'.format(target_dir) + if track == "MIST_1.2_vvcrit0.0": + name = "MIST_1.2_EEPtrk.h5" else: raise ValueError("The specified track file does not exist!") - res = os.system('wget {} {}'.format(alias, url)) + return _fetch(name, target_dir) - if res > 0: - raise RuntimeError("The file failed to download!") - -def fetch_dustmaps(target_dir='.', dustmap="bayestar19"): +def fetch_dustmaps(target_dir=".", dustmap="bayestar19"): """ Download 3-D dust maps to target directory. @@ -422,21 +425,15 @@ def fetch_dustmaps(target_dir='.', dustmap="bayestar19"): - 'bayestar19' (default), the "Bayestar" map from Green et al. (2019) """ - - if dustmap == 'bayestar19': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/G49MEI/Y9UZPG') - alias = '-O {}/bayestar2019_v1.h5'.format(target_dir) + if dustmap == "bayestar19": + name = "bayestar2019_v1.h5" else: raise ValueError("The specified dustmap file does not exist!") - res = os.system('wget {} {}'.format(alias, url)) + return _fetch(name, target_dir) - if res > 0: - raise RuntimeError("The file failed to download!") - -def fetch_grids(target_dir='.', grid="mist_v9"): +def fetch_grids(target_dir=".", grid="mist_v9"): """ Download stellar model grids to target directory. @@ -454,28 +451,19 @@ def fetch_grids(target_dir='.', grid="mist_v9"): """ - if grid == 'mist_v9': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/7BA4ZG/Z7MGA7') - alias = '-O {}/grid_mist_v9.h5'.format(target_dir) - elif grid == 'mist_v8': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/7BA4ZG/NKVZFT') - alias = '-O {}/grid_mist_v8.h5'.format(target_dir) - elif grid == 'bayestar_v5': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/7BA4ZG/LLZP0B') - alias = '-O {}/grid_bayestar_v5.h5'.format(target_dir) + if grid == "mist_v9": + name = "grid_mist_v9.h5" + elif grid == "mist_v8": + name = "grid_mist_v8.h5" + elif grid == "bayestar_v5": + name = "grid_bayestar_v5.h5" else: raise ValueError("The specified grid file does not exist!") - res = os.system('wget {} {}'.format(alias, url)) - - if res > 0: - raise RuntimeError("The file failed to download!") + return _fetch(name, target_dir) -def fetch_offsets(target_dir='.', grid="mist_v9"): +def fetch_offsets(target_dir=".", grid="mist_v9"): """ Download stellar model grids to target directory. @@ -493,28 +481,19 @@ def fetch_offsets(target_dir='.', grid="mist_v9"): """ - if grid == 'mist_v9': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/L7D1FY/XXXXXX') - alias = '-O {}/offsets_mist_v9.txt'.format(target_dir) - elif grid == 'mist_v8': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/L7D1FY/QTNKKN') - alias = '-O {}/offsets_mist_v8.txt'.format(target_dir) - elif grid == 'bayestar_v5': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/L7D1FY/W4O6NJ') - alias = '-O {}/offsets_bs_v9.txt'.format(target_dir) + if grid == "mist_v9": + name = "offsets_mist_v9.txt" + elif grid == "mist_v8": + name = "offsets_mist_v8.txt" + elif grid == "bayestar_v5": + name = "offsets_bs_v9.txt" else: raise ValueError("The specified grid file does not exist!") - res = os.system('wget {} {}'.format(alias, url)) + return _fetch(name, target_dir) - if res > 0: - raise RuntimeError("The file failed to download!") - -def fetch_nns(target_dir='.', model="c3k"): +def fetch_nns(target_dir=".", model="c3k"): """ Download neural network (NN) files to target directory. @@ -530,17 +509,12 @@ def fetch_nns(target_dir='.', model="c3k"): """ - if model == 'c3k': - url = ('https://dataverse.harvard.edu/api/access/datafile/' + - ':persistentId?persistentId=doi:10.7910/DVN/MSCY2O/XHU1VJ') - alias = '-O {}/nn_c3k.h5'.format(target_dir) + if model == "c3k": + name = "nn_c3k.h5" else: raise ValueError("The specified isochrone file does not exist!") - res = os.system('wget {} {}'.format(alias, url)) - - if res > 0: - raise RuntimeError("The file failed to download!") + return _fetch(name, target_dir) def load_models(filepath, filters=None, labels=None, diff --git a/setup.py b/setup.py index 47b4255..fa3eaef 100644 --- a/setup.py +++ b/setup.py @@ -2,15 +2,14 @@ # -*- coding: utf-8 -*- import os -import sys import re -import glob + try: from setuptools import setup - setup + except ImportError: from distutils.core import setup - setup + dir_path = os.path.dirname(os.path.realpath(__file__)) @@ -42,8 +41,16 @@ long_description=long_description, package_data={"": ["README.md", "LICENSE", "AUTHORS.md"]}, include_package_data=True, - install_requires=["numpy", "scipy", "matplotlib", "six", "h5py", - "healpy", "numba"], + install_requires=[ + "numpy", + "scipy", + "matplotlib", + "six", + "h5py", + "healpy", + "numba", + "pooch>=1.4", + ], keywords=["brute force", "photometry", "bayesian", "stellar", "star", "cluster", "isochrone", "dust", "reddening", "parallax", "distance", "template fitting"],