From 2c4c5e203a934c2f56d0d25e8f2f8b362c8ea68d Mon Sep 17 00:00:00 2001 From: ritvik Date: Mon, 30 Dec 2024 13:09:42 -0500 Subject: [PATCH] updates --- geoprepare/__init__.py | 4 +- geoprepare/base.py | 46 ++++- .../{VHI.py => diagnostics/__init__.py} | 0 geoprepare/diagnostics/count_files.py | 47 +++++ geoprepare/extract/stats.py | 8 + geoprepare/extract/util/raster/__init__.py | 2 + geoprepare/geoextract.txt | 174 +++++++++++++++++- setup.py | 2 +- 8 files changed, 276 insertions(+), 7 deletions(-) rename geoprepare/{VHI.py => diagnostics/__init__.py} (100%) create mode 100644 geoprepare/diagnostics/count_files.py create mode 100644 geoprepare/extract/util/raster/__init__.py diff --git a/geoprepare/__init__.py b/geoprepare/__init__.py index 7ebaaf1..11b84fd 100644 --- a/geoprepare/__init__.py +++ b/geoprepare/__init__.py @@ -2,6 +2,6 @@ __author__ = """Ritvik Sahajpal""" __email__ = "ritvik@umd.edu" -__version__ = "0.1.2" +__version__ = "0.6.0" -__all__ = ["log", "utils.py", "base", "geoprepare", "geoextract"] +__all__ = ["log", "utils", "base", "geodownload", "geoextract", "geomerge", "diagnostics"] diff --git a/geoprepare/base.py b/geoprepare/base.py index 1b58bac..53fe305 100644 --- a/geoprepare/base.py +++ b/geoprepare/base.py @@ -116,9 +116,52 @@ def get_dirname(self, country): f"crop_t{self.limit}" if self.threshold else f"crop_p{self.limit}" ) + def get_key_or_value(self, input_item): + """ + If input_item is a key, return its value. + If input_item is a value, return its key. + Otherwise, return None. + """ + mapping = { + "mz": "maize", + "sb": "soybean", + "rc": "rice", + "sw": "spring_wheat", + "ww": "winter_wheat", + "ml": "millet", + "tf": "teff", + "sr": "sorghum" + } + + # Check if the input item is a key in the mapping + if input_item in mapping: + return mapping[input_item] + + # Check if the input item is a value in the inverse mapping + inverse_mapping = {v: k for k, v in mapping.items()} + + if input_item in inverse_mapping: + return inverse_mapping[input_item] + + # If it's neither a key nor a value + return None + + def get_calendar_sheet_name(self, crop, growing_season): + sheet_name = None + crop_name = self.get_key_or_value(crop) + + if crop in ["winter_wheat", "spring_wheat"]: + sheet_name = crop_name + else: + sheet_name = f"{crop_name}_{growing_season}" + + return sheet_name + def read_statistics( self, country, + crop="mz", + growing_season=1, read_calendar=False, read_statistics=False, read_countries=False, @@ -135,13 +178,14 @@ def read_statistics( # Get crop calendar information if read_calendar or read_all: + sheet_name = self.get_calendar_sheet_name(crop, growing_season) self.path_calendar = ( self.dir_input / "crop_calendars" / self.parser.get(category, "calendar_file") ) self.df_calendar = ( - pd.read_csv(self.path_calendar) + pd.read_excel(self.path_calendar, sheet_name=sheet_name) if os.path.isfile(self.path_calendar) else pd.DataFrame() ) diff --git a/geoprepare/VHI.py b/geoprepare/diagnostics/__init__.py similarity index 100% rename from geoprepare/VHI.py rename to geoprepare/diagnostics/__init__.py diff --git a/geoprepare/diagnostics/count_files.py b/geoprepare/diagnostics/count_files.py new file mode 100644 index 0000000..1ca02ea --- /dev/null +++ b/geoprepare/diagnostics/count_files.py @@ -0,0 +1,47 @@ +import os +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from tqdm import tqdm + +# Base path of the directory +base_path = "/gpfs/data1/cmongp1/GEOGLAM/Output/FEWSNET/crop_t20" + +# Get a list of all countries (directories in the base path) +countries = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))] + +# Folder names (as seen in the image) +folder_names = ["chirps", "chirps_gefs", "cpc_tmax", "cpc_tmin", "esi_4wk", "ndvi", "nsidc_rootzone", "nsidc_surface"] + +# Data collection +file_counts = [] + +for country in tqdm(countries, desc="Countries"): + # Dynamically find the deepest subdirectory (replace 'admin_1/cr' with auto-detection) + country_path = os.path.join(base_path, country) + subdirs = [os.path.join(root, d) for root, dirs, _ in os.walk(country_path) for d in dirs if "cr" in d] + if subdirs: + folder_path_root = subdirs[0] # Assuming there's only one relevant "cr" path + else: + folder_path_root = country_path # If no "cr" directory exists, fallback to the base country directory + + counts = [] + for folder in tqdm(folder_names, desc="Variables", leave=False): + folder_path = os.path.join(folder_path_root, folder) + if os.path.exists(folder_path): + counts.append(len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])) + else: + counts.append(0) + file_counts.append(counts) + +# Create DataFrame +df = pd.DataFrame(file_counts, index=countries, columns=folder_names) + +# Plot heatmap +plt.figure(figsize=(12, 8)) +sns.heatmap(df, annot=True, fmt="d", cmap="YlGnBu") +plt.title("Number of Files in Subfolders by Country") +plt.xlabel("Folder Names") +plt.ylabel("Countries") +plt.tight_layout() +plt.savefig("count_files.png", dpi=300) diff --git a/geoprepare/extract/stats.py b/geoprepare/extract/stats.py index 3b4271f..8224d3f 100644 --- a/geoprepare/extract/stats.py +++ b/geoprepare/extract/stats.py @@ -59,6 +59,14 @@ def get_var(var: str, indicator_arr: np.ndarray) -> ma.MaskedArray: (x < 0.0) | (x == 9999.0), x ), + "nsidc_surface": lambda x: ma.masked_where( + (x < 0.0) | (x == 9999.0), + x + ), + "nsidc_rootzone": lambda x: ma.masked_where( + (x < 0.0) | (x == 9999.0), + x + ), "chirps": lambda x: ma.masked_less(x, 0.0) / 100.0, "chirps_gefs": lambda x: ma.masked_less(x, 0.0) / 100.0, "cpc_tmax": lambda x: ma.masked_less(x, -273.15), diff --git a/geoprepare/extract/util/raster/__init__.py b/geoprepare/extract/util/raster/__init__.py new file mode 100644 index 0000000..541511d --- /dev/null +++ b/geoprepare/extract/util/raster/__init__.py @@ -0,0 +1,2 @@ +from .array import * +from .raster import * diff --git a/geoprepare/geoextract.txt b/geoprepare/geoextract.txt index 1b6ebc1..5954355 100644 --- a/geoprepare/geoextract.txt +++ b/geoprepare/geoextract.txt @@ -121,126 +121,293 @@ crops = ['rc'] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; EWCM ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +[afghanistan] +category = EWCM +scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = gaul1_asap_v04.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] + [angola] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] + +[bangladesh] +category = EWCM +scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = gaul1_asap_v04.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [benin] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [burkina_faso] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [burundi] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [cameroon] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [central_african_republic] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [chad] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [drc] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [ethiopia] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [guinea] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [kenya] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [lesotho] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [liberia] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [madagascar] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [malawi] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [mali] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [mauritania] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [mozambique] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [niger] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [rwanda] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [senegal] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [sierra_leone] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [somalia] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [south_africa] category = EWCM scales = ['admin_1'] crops = ['mz', 'ww'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx [south_sudan] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [sudan] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] -[tanzania] +[united_republic_of_tanzania] category = EWCM scales = ['admin_1'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [uganda] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [zambia] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] [zimbabwe] category = EWCM scales = ['admin_2'] +growing_seasons = [1] +use_cropland_mask = True +shp_boundary = adm_shapefile.shp +calendar_file = EWCM_2024-10-16.xlsx +crops = ['mz', 'sr', 'ml', 'rc', 'ww', 'tf'] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Crops ;; @@ -292,7 +459,7 @@ redo = False threshold = True floor = 20 ceil = 90 -countries = ['angola', 'benin'] +countries = ['angola'] crops = ['ww', 'sb', 'sw', 'rc', 'mz'] shp_boundary = gaul1_asap_v04.shp category = AMIS @@ -304,7 +471,7 @@ use_cropland_mask = False statistics_file = statistics.csv zone_file = countries.csv calendar_file = crop_calendar.csv -eo_model = ['ndvi', 'cpc_tmax', 'cpc_tmin', 'chirps', 'chirps_gefs', 'esi_4wk', 'soil_moisture_as1', 'soil_moisture_as2'] +eo_model = ['nsidc_surface', 'nsidc_rootzone', 'ndvi', 'cpc_tmax', 'cpc_tmin', 'chirps', 'chirps_gefs', 'esi_4wk'] ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Project ;; @@ -312,3 +479,4 @@ eo_model = ['ndvi', 'cpc_tmax', 'cpc_tmin', 'chirps', 'chirps_gefs', 'esi_4wk', [PROJECT] project_name = FEWSNET parallel_extract = False +parallel_merge = False diff --git a/setup.py b/setup.py index b63e8d2..e4c3de6 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,6 @@ test_suite="tests", tests_require=test_requirements, url="https://github.com/ritviksahajpal/geoprepare", - version="0.5.87", + version="0.6.1", zip_safe=False, )