Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
ritviksahajpal committed Dec 30, 2024
1 parent db26c40 commit 2c4c5e2
Show file tree
Hide file tree
Showing 8 changed files with 276 additions and 7 deletions.
4 changes: 2 additions & 2 deletions geoprepare/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

__author__ = """Ritvik Sahajpal"""
__email__ = "[email protected]"
__version__ = "0.1.2"
__version__ = "0.6.0"

__all__ = ["log", "utils.py", "base", "geoprepare", "geoextract"]
__all__ = ["log", "utils", "base", "geodownload", "geoextract", "geomerge", "diagnostics"]
46 changes: 45 additions & 1 deletion geoprepare/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,52 @@ def get_dirname(self, country):
f"crop_t{self.limit}" if self.threshold else f"crop_p{self.limit}"
)

def get_key_or_value(self, input_item):
"""
If input_item is a key, return its value.
If input_item is a value, return its key.
Otherwise, return None.
"""
mapping = {
"mz": "maize",
"sb": "soybean",
"rc": "rice",
"sw": "spring_wheat",
"ww": "winter_wheat",
"ml": "millet",
"tf": "teff",
"sr": "sorghum"
}

# Check if the input item is a key in the mapping
if input_item in mapping:
return mapping[input_item]

# Check if the input item is a value in the inverse mapping
inverse_mapping = {v: k for k, v in mapping.items()}

if input_item in inverse_mapping:
return inverse_mapping[input_item]

# If it's neither a key nor a value
return None

def get_calendar_sheet_name(self, crop, growing_season):
sheet_name = None
crop_name = self.get_key_or_value(crop)

if crop in ["winter_wheat", "spring_wheat"]:
sheet_name = crop_name
else:
sheet_name = f"{crop_name}_{growing_season}"

return sheet_name

def read_statistics(
self,
country,
crop="mz",
growing_season=1,
read_calendar=False,
read_statistics=False,
read_countries=False,
Expand All @@ -135,13 +178,14 @@ def read_statistics(

# Get crop calendar information
if read_calendar or read_all:
sheet_name = self.get_calendar_sheet_name(crop, growing_season)
self.path_calendar = (
self.dir_input
/ "crop_calendars"
/ self.parser.get(category, "calendar_file")
)
self.df_calendar = (
pd.read_csv(self.path_calendar)
pd.read_excel(self.path_calendar, sheet_name=sheet_name)
if os.path.isfile(self.path_calendar)
else pd.DataFrame()
)
Expand Down
File renamed without changes.
47 changes: 47 additions & 0 deletions geoprepare/diagnostics/count_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Base path of the directory
base_path = "/gpfs/data1/cmongp1/GEOGLAM/Output/FEWSNET/crop_t20"

# Get a list of all countries (directories in the base path)
countries = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d))]

# Folder names (as seen in the image)
folder_names = ["chirps", "chirps_gefs", "cpc_tmax", "cpc_tmin", "esi_4wk", "ndvi", "nsidc_rootzone", "nsidc_surface"]

# Data collection
file_counts = []

for country in tqdm(countries, desc="Countries"):
# Dynamically find the deepest subdirectory (replace 'admin_1/cr' with auto-detection)
country_path = os.path.join(base_path, country)
subdirs = [os.path.join(root, d) for root, dirs, _ in os.walk(country_path) for d in dirs if "cr" in d]
if subdirs:
folder_path_root = subdirs[0] # Assuming there's only one relevant "cr" path
else:
folder_path_root = country_path # If no "cr" directory exists, fallback to the base country directory

counts = []
for folder in tqdm(folder_names, desc="Variables", leave=False):
folder_path = os.path.join(folder_path_root, folder)
if os.path.exists(folder_path):
counts.append(len([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]))
else:
counts.append(0)
file_counts.append(counts)

# Create DataFrame
df = pd.DataFrame(file_counts, index=countries, columns=folder_names)

# Plot heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df, annot=True, fmt="d", cmap="YlGnBu")
plt.title("Number of Files in Subfolders by Country")
plt.xlabel("Folder Names")
plt.ylabel("Countries")
plt.tight_layout()
plt.savefig("count_files.png", dpi=300)
8 changes: 8 additions & 0 deletions geoprepare/extract/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ def get_var(var: str, indicator_arr: np.ndarray) -> ma.MaskedArray:
(x < 0.0) | (x == 9999.0),
x
),
"nsidc_surface": lambda x: ma.masked_where(
(x < 0.0) | (x == 9999.0),
x
),
"nsidc_rootzone": lambda x: ma.masked_where(
(x < 0.0) | (x == 9999.0),
x
),
"chirps": lambda x: ma.masked_less(x, 0.0) / 100.0,
"chirps_gefs": lambda x: ma.masked_less(x, 0.0) / 100.0,
"cpc_tmax": lambda x: ma.masked_less(x, -273.15),
Expand Down
2 changes: 2 additions & 0 deletions geoprepare/extract/util/raster/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .array import *
from .raster import *
Loading

0 comments on commit 2c4c5e2

Please sign in to comment.