Skip to content

Commit

Permalink
Merge pull request #176 from weecology/schema_field
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanwhite authored Apr 1, 2024
2 parents 951bc51 + a43369a commit f012979
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 45 deletions.
40 changes: 40 additions & 0 deletions deepforest_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Config file for DeepForest pytorch module

# Cpu workers for data loaders
# Dataloaders
workers: 1
devices: 1
accelerator: 'gpu'
batch_size: 1

# Model Architecture
architecture: 'retinanet'
num_classes: 1
nms_thresh: 0.05

# Architecture specific params
retinanet:
# Non-max supression of overlapping predictions
score_thresh: 0.1

train:
csv_file:
root_dir:

# Optimizer initial learning rate
lr: 0.001

# Print loss every n epochs
epochs: 1
# Useful debugging flag in pytorch lightning, set to True to get a single batch of training to test settings.
fast_dev_run: False
# pin images to GPU memory for fast training. This depends on GPU size and number of images.
preload_images: False

validation:
# callback args
csv_file:
root_dir:
# Intersection over union evaluation
iou_threshold: 0.4
val_accuracy_interval: 20
11 changes: 7 additions & 4 deletions everglades_dryrun_workflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
#SBATCH --job-name=everglades_workflow
#SBATCH [email protected]
#SBATCH --mail-type=FAIL
#SBATCH --gpus=a100:4
#SBATCH --cpus-per-task=5
#SBATCH --gpus=a100:1
#SBATCH --cpus-per-task=3
#SBATCH --mem=200gb
#SBATCH --time=00:40:00
#SBATCH --time=01:30:00
#SBATCH --partition=gpu
#SBATCH --output=/blue/ewhite/everglades/EvergladesTools/logs/everglades_dryrun_workflow.out
#SBATCH --error=/blue/ewhite/everglades/EvergladesTools/logs/everglades_dryrun_workflow.err
Expand All @@ -20,5 +20,8 @@ conda activate EvergladesTools
export TEST_ENV=True

cd /blue/ewhite/everglades/EvergladesTools/Zooniverse

snakemake --unlock
snakemake --printshellcmds --keep-going --cores 5 --resources gpu=4 --rerun-incomplete --latency-wait 10 --use-conda
echo "INFO [$(date "+%Y-%m-%d %H:%M:%S")] Starting Snakemake pipeline"
snakemake --printshellcmds --keep-going --cores 3 --resources gpu=1 --rerun-incomplete --latency-wait 1 --use-conda
echo "INFO [$(date "+%Y-%m-%d %H:%M:%S")] End"
11 changes: 7 additions & 4 deletions everglades_workflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
#SBATCH --job-name=everglades_workflow
#SBATCH [email protected]
#SBATCH --mail-type=FAIL
#SBATCH --gpus=a100:4
#SBATCH --cpus-per-task=10
#SBATCH --mem=200gb
#SBATCH --gpus=a100:1
#SBATCH --cpus-per-task=30
#SBATCH --mem=1200gb
#SBATCH --time=80:00:00
#SBATCH --partition=gpu
#SBATCH --output=/blue/ewhite/everglades/EvergladesTools/logs/everglades_workflow.out
Expand All @@ -19,5 +19,8 @@ ml conda
conda activate EvergladesTools

cd /blue/ewhite/everglades/EvergladesTools/Zooniverse

snakemake --unlock
snakemake --printshellcmds --keep-going --cores 10 --resources gpu=4 --rerun-incomplete --latency-wait 10 --use-conda
echo "INFO [$(date "+%Y-%m-%d %H:%M:%S")] Starting Snakemake pipeline"
snakemake --printshellcmds --keep-going --cores 30 --resources gpu=1 --rerun-incomplete --latency-wait 10 --use-conda
echo "INFO [$(date "+%Y-%m-%d %H:%M:%S")] End"
5 changes: 3 additions & 2 deletions nest_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ def compare_site(gdf):
results = pd.concat(results)
else:
results = pd.DataFrame(columns=[
'matched_xmin', 'matched_ymin', 'xmax', 'matched_ymax', 'label', 'score', 'Date', 'bird_id', 'target_index',
'geometry'
'matched_xmin', 'matched_ymin', 'xmax', 'matched_ymax', 'label', 'score', 'image_path', 'Date', 'bird_id',
'target_index', 'geometry'
])

return results
Expand All @@ -108,6 +108,7 @@ def detect_nests(bird_detection_file, year, site, savedir):
'matched_ymax': 'float',
'label': 'str',
'score': 'float',
'image_path': 'str',
'Site': 'str',
'Date': 'str',
'Year': 'str',
Expand Down
41 changes: 6 additions & 35 deletions predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,7 @@
import shapely
import torch
from deepforest import main


def project(raster_path, boxes):
"""
Convert image coordinates into a geospatial object to overlap with input image.
Args:
raster_path: path to the raster .tif on disk. Assumed to have a valid spatial projection
boxes: a prediction pandas dataframe from deepforest.predict_tile()
Returns:
a geopandas dataframe with predictions in input projection.
"""
with rasterio.open(raster_path) as dataset:
bounds = dataset.bounds
pixelSizeX, pixelSizeY = dataset.res

# subtract origin. Recall that numpy origin is top left! Not bottom left.
boxes["xmin"] = (boxes["xmin"] * pixelSizeX) + bounds.left
boxes["xmax"] = (boxes["xmax"] * pixelSizeX) + bounds.left
boxes["ymin"] = bounds.top - (boxes["ymin"] * pixelSizeY)
boxes["ymax"] = bounds.top - (boxes["ymax"] * pixelSizeY)

# combine column to a shapely Box() object, save shapefile
boxes['geometry'] = boxes.apply(lambda x: shapely.geometry.box(x.xmin, x.ymin, x.xmax, x.ymax), axis=1)
boxes = geopandas.GeoDataFrame(boxes, geometry='geometry')

boxes.crs = dataset.crs.to_wkt()

# Shapefiles could be written with geopandas boxes.to_file(<filename>, driver='ESRI Shapefile')

return boxes
from deepforest.utilities import boxes_to_shapefile


def run(proj_tile_path, checkpoint_path, savedir="."):
Expand All @@ -57,19 +28,19 @@ def run(proj_tile_path, checkpoint_path, savedir="."):
model.load_state_dict(checkpoint["state_dict"])

boxes = model.predict_tile(raster_path=proj_tile_path, patch_overlap=0, patch_size=1500)
projected_boxes = project(proj_tile_path, boxes)

proj_tile_dir = os.path.dirname(proj_tile_path)
projected_boxes = boxes_to_shapefile(boxes, proj_tile_dir)
if not os.path.exists(savedir):
os.makedirs(savedir)
basename = os.path.splitext(os.path.basename(proj_tile_path))[0]
fn = "{}/{}.shp".format(savedir, basename)
projected_boxes.to_file(fn)

# Write GeoDataFrame to a new shapefile (avoid appending)
projected_boxes.to_file(fn, driver="ESRI Shapefile")
return fn


if __name__ == "__main__":
checkpoint_path = "/blue/ewhite/everglades/Zooniverse//20220910_182547/species_model.pl"
checkpoint_path = "/blue/ewhite/everglades/Zooniverse/20220910_182547/species_model.pl"

path = sys.argv[1]
split_path = os.path.normpath(path).split(os.path.sep)
Expand Down

0 comments on commit f012979

Please sign in to comment.