Skip to content

Commit

Permalink
add USGS backbone
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Jan 16, 2025
1 parent 148fdcf commit c52bcc3
Show file tree
Hide file tree
Showing 11 changed files with 155 additions and 49 deletions.
64 changes: 64 additions & 0 deletions USGS_backbone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from deepforest import main
import pandas as pd
import os
import tempfile
import comet_ml
from pytorch_lightning.loggers import CometLogger

df = pd.read_csv("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/20231116_cropped_annotations.csv")
df.wat_label.value_counts()
df = df[df.wat_label.isin(["Bird","Cartilaginous Fish","Bony Fish","Mammal","Reptile"])]

# Combine Fish classes
df.loc[df.wat_label.isin(["Cartilaginous Fish","Bony Fish"]),"wat_label"] = "Fish"

# Construct padded crop name
df["image_path"] = df["bname_parent"] +"_" + df["tile_xtl"].astype(str) + "_" + df["tile_ytl"].astype(str) + "_" + df["tile_xbr"].astype(str) + "_" + df["tile_ybr"].astype(str) + ".JPG"

# Check if all images exist
df["image_exists"] = df["image_path"].apply(lambda x: os.path.exists(os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded",x)))

df["xmin"] = df["xtl"]
df["ymin"] = df["ytl"]
df["xmax"] = df["xbr"]
df["ymax"] = df["ybr"]
df["label"] = df["wat_label"]

# Randomly split 80 - 20 for each class
train = df.groupby("wat_label").sample(frac=0.85)
test = df.drop(train.index)

# Write to tmp data directory
tmpdir = tempfile.mkdtemp()
train.to_csv(os.path.join(tmpdir,"train.csv"),index=False)
test.to_csv(os.path.join(tmpdir,"test.csv"),index=False)

# Initialize new Deepforest model ( the model that you will train ) with your classes
m = main.deepforest(config_args={"num_classes":4}, label_dict={"Bird":0,"Fish":1,"Mammal":2,"Reptile":3})

# Inatialize Deepforest model ( the model that you will modify its regression head )
deepforest_release_model = main.deepforest()
deepforest_release_model.load_model("weecology/deepforest-bird") # or load_model('weecology/deepforest-bird')

# Extract single class backbone that will have useful features for multi-class classification
m.model.backbone.load_state_dict(deepforest_release_model.model.backbone.state_dict())

# load regression head in the new model
m.model.head.regression_head.load_state_dict(deepforest_release_model.model.head.regression_head.state_dict())

m.config["train"]["csv_file"] = os.path.join(tmpdir,"train.csv")
m.config["train"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded"
m.config["train"]["fast_dev_run"] = False
m.config["validation"]["csv_file"] = os.path.join(tmpdir,"test.csv")
m.config["validation"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded"
m.config["batch_size"] = 6
m.config["train"]["epochs"] = 25
m.config["validation"]["val_accuracy_interval"] = 5
m.config["train"]["scheduler"]["params"]["eps"] = 0
comet_logger = CometLogger(project_name="BOEM", workspace="bw4sz")

m.create_trainer(logger=comet_logger)
m.trainer.fit(m)

# Save the model
m.trainer.save_checkpoint("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/checkpoints/{}.pl".format(comet_logger.experiment.id))
12 changes: 6 additions & 6 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,20 @@ predict:
min_score: 0.4

pipeline:
confidence_threshold: 0.5
confidence_threshold: 0.9
limit_empty_frac: 0.01

propagate:
time_threshold_seconds: 5
distance_threshold_pixels: 50

detection_model:
checkpoint: bird
checkpoint: "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/checkpoints/5420a9c3f27d4299992094a7b9b49cb7.pl"
checkpoint_dir: /blue/ewhite/b.weinstein/BOEM/detection/checkpoints
train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/train/
train_image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
crop_image_dir: /blue/ewhite/b.weinstein/BOEM/detection/crops/
limit_empty_frac: 0.25
limit_empty_frac: 0.2
labels:
- "Bird"
trainer:
Expand All @@ -49,7 +49,7 @@ detection_model:
lr: 0.0001
workers: 0
validation:
val_accuracy_interval: 20
val_accuracy_interval: 3

classification_model:
checkpoint:
Expand Down Expand Up @@ -84,7 +84,7 @@ active_learning:
n_images: 50
patch_size: 2000
patch_overlap: 0
min_score: 0.25
min_score: 0.1
model_checkpoint:
target_labels:
- "Bird"
Expand All @@ -98,7 +98,7 @@ active_learning:
active_testing:
image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27
strategy: 'random'
n_images: 1
n_images: 1000
m:
patch_size: 2000
patch_overlap: 0
Expand Down
8 changes: 7 additions & 1 deletion src/active_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,15 @@ def update_sys_path():
dask_results.append(pd.concat(block_result))
preannotations = pd.concat(dask_results)
else:
preannotations = detection.predict(m=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap)
preannotations = detection.predict(m=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, batch_size=32)
preannotations = pd.concat(preannotations)

# Print the number of preannotations before removing min score
print("There are {} preannotations before removing min score".format(preannotations.shape[0]))
print("There are {} images before removing min score".format(preannotations["image_path"].nunique()))
preannotations = preannotations[preannotations["score"] >= min_score]
print("There are {} preannotations after removing min score".format(preannotations.shape[0]))
print("There are {} images after removing min score".format(preannotations["image_path"].nunique()))

if strategy == "most-detections":
# Sort images by total number of predictions
Expand Down Expand Up @@ -158,6 +163,7 @@ def update_sys_path():
preannotations = detection.predict(model=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap)
preannotations = pd.concat(preannotations)

print("There are {} preannotations before removing min score".format(preannotations.shape[0]))
preannotations = preannotations[preannotations["score"] >= min_score]

if strategy == "most-detections":
Expand Down
47 changes: 31 additions & 16 deletions src/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def load(checkpoint, annotations = None):

if not annotations is None:
num_labels = len(annotations.label.unique())
if num_labels != len(snapshot.label_dict):
if num_labels > len(snapshot.label_dict):
snapshot = extract_backbone(snapshot, annotations)

return snapshot
Expand Down Expand Up @@ -135,15 +135,19 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
"""
tmpdir = tempfile.gettempdir()

# Fix taxonomy
train_annotations = fix_taxonomy(train_annotations)
test_annotations = fix_taxonomy(test_annotations)

train_annotations.to_csv(os.path.join(tmpdir,"train.csv"), index=False)
test_annotations.to_csv(os.path.join(tmpdir,"test.csv"), index=False)

# Set config
model.config["train"]["csv_file"] = os.path.join(tmpdir,"train.csv")
model.config["train"]["root_dir"] = train_image_dir

#model.config["validation"]["csv_file"] = os.path.join(tmpdir,"test.csv")
#model.config["validation"]["root_dir"] = train_image_dir
model.config["validation"]["csv_file"] = os.path.join(tmpdir,"test.csv")
model.config["validation"]["root_dir"] = train_image_dir

# Loop through all keys in model.config and set them to the value of the key in model.config
config_args = OmegaConf.to_container(config_args)
Expand Down Expand Up @@ -173,13 +177,14 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
sample_train_annotations_for_image.root_dir = train_image_dir
visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
# with comet_logger.experiment.context_manager("test_images"):
# non_empty_train_annotations = read_file(model.config["validation"]["csv_file"], root_dir=train_image_dir)
# for filename in non_empty_train_annotations.image_path.sample(5):
# sample_train_annotations_for_image = non_empty_train_annotations[non_empty_train_annotations.image_path == filename]
# sample_train_annotations_for_image.root_dir = train_image_dir
# visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
# comet_logger.experiment.log_image(os.path.join(tmpdir, filename))

with comet_logger.experiment.context_manager("test_images"):
non_empty_validation_annotations = read_file(model.config["validation"]["csv_file"], root_dir=train_image_dir)
for filename in non_empty_validation_annotations.image_path.head(5):
sample_validation_annotations_for_image = non_empty_validation_annotations[non_empty_validation_annotations.image_path == filename]
sample_validation_annotations_for_image.root_dir = train_image_dir
visualize.plot_annotations(sample_validation_annotations_for_image, savedir=tmpdir)
comet_logger.experiment.log_image(os.path.join(tmpdir, filename))

model.trainer.fit(model)

Expand All @@ -196,6 +201,12 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro

return model

def fix_taxonomy(df):
df["label"] = df.label.replace('Turtle', 'Reptile')
df["label"] = df.label.replace('Cetacean', 'Mammal')

return df

def preprocess_and_train(config, model_type="detection"):
"""Preprocess data and train model.
Expand All @@ -208,6 +219,7 @@ def preprocess_and_train(config, model_type="detection"):
# Get and split annotations
train_df = gather_data(config.detection_model.train_csv_folder)
validation = gather_data(config.label_studio.csv_dir_validation)

validation.loc[validation.label==0,"label"] = "Bird"

# Remove the empty frames, using hard mining instead
Expand Down Expand Up @@ -239,7 +251,9 @@ def preprocess_and_train(config, model_type="detection"):
if config.detection_model.limit_empty_frac > 0:
train_df = limit_empty_frames(train_df, config.detection_model.limit_empty_frac)
if not validation_df.empty:
validation_df = limit_empty_frames(validation_df, config.detection_model.limit_empty_frac)
#validation_df = limit_empty_frames(validation_df, config.detection_model.limit_empty_frac)
# DeepForest evaluate doesn't work with empty frames yet, see https://github.com/weecology/DeepForest/pull/858
validation_df = validation_df[validation_df.xmin!=0]


# Train model
Expand Down Expand Up @@ -283,32 +297,33 @@ def get_latest_checkpoint(checkpoint_dir, annotations):

return m

def _predict_list_(image_paths, patch_size, patch_overlap, model_path, m=None, crop_model=None):
def _predict_list_(image_paths, patch_size, patch_overlap, model_path, m=None, crop_model=None, batch_size=64):
if model_path:
m = load(model_path)
else:
if m is None:
raise ValueError("A model or model_path is required for prediction.")

m.create_trainer(fast_dev_run=False)

m.config["batch_size"] = batch_size
predictions = []
for image_path in image_paths:
prediction = m.predict_tile(raster_path=image_path, return_plot=False, patch_size=patch_size, patch_overlap=patch_overlap, crop_model=crop_model, verbose=True)
prediction = m.predict_tile(raster_path=image_path, return_plot=False, patch_size=patch_size, patch_overlap=patch_overlap, crop_model=crop_model)
if prediction is None:
prediction = pd.DataFrame({"image_path": image_path, "xmin": [None], "ymin": [None], "xmax": [None], "ymax": [None], "label": [None], "score": [None]})
predictions.append(prediction)

return predictions

def predict(image_paths, patch_size, patch_overlap, m=None, model_path=None, dask_client=None, crop_model=None):
def predict(image_paths, patch_size, patch_overlap, m=None, model_path=None, dask_client=None, crop_model=None, batch_size=8):
"""Predict bounding boxes for images
Args:
m (main.deepforest): A trained deepforest model.
image_paths (list): A list of image paths.
crop_model (main.deepforest): A trained deepforest model for classification.
model_path (str): The path to a model checkpoint.
dask_client (dask.distributed.Client): A dask client for parallel prediction.
batch_size (int): The batch size for prediction.
Returns:
list: A list of image predictions.
"""
Expand Down Expand Up @@ -337,6 +352,6 @@ def update_sys_path():
block_result = block_result.result()
predictions.append(pd.concat(block_result))
else:
predictions = _predict_list_(image_paths=image_paths, patch_size=patch_size, patch_overlap=patch_overlap, model_path=model_path, m=m, crop_model=crop_model)
predictions = _predict_list_(image_paths=image_paths, patch_size=patch_size, patch_overlap=patch_overlap, model_path=model_path, m=m, crop_model=crop_model, batch_size=batch_size)

return predictions
7 changes: 4 additions & 3 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,8 @@ def run(self):
min_score=self.config.active_learning.min_score
)

print(f"Images requiring human review: {len(confident_predictions)}")
print(f"Images auto-annotated: {len(uncertain_predictions)}")
print(f"Images requiring human review: {len(uncertain_predictions)}")
print(f"Images auto-annotated: {len(confident_predictions)}")

# Intelligent cropping
image_paths = uncertain_predictions["image_path"].unique()
Expand Down Expand Up @@ -199,6 +199,7 @@ def run(self):
uncertain_predictions=uncertain_predictions,
pipeline_monitor=pipeline_monitor)

reporter.generate_report()
reporter.generate_report(create_video=True)
else:
print("No images to annotate")

1 change: 1 addition & 0 deletions src/pipeline_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def predict_classification(self):
image_paths=full_image_paths,
patch_size=self.patch_size,
patch_overlap=self.patch_overlap,
batch_size=32
)
combined_predictions = pd.concat(predictions)
self.predictions.append(combined_predictions)
Expand Down
5 changes: 3 additions & 2 deletions src/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,15 @@ def concat_predictions(self):
"""
self.all_predictions = pd.concat(self.pipeline_monitor.predictions, ignore_index=True)

def generate_report(self):
def generate_report(self, create_video=False):
"""Generate a report"""

if self.pipeline_monitor:
self.concat_predictions()
self.write_predictions()
self.write_metrics()
self.generate_video()
if create_video:
self.generate_video()

def write_predictions(self):
"""Write predictions to a csv file"""
Expand Down
2 changes: 1 addition & 1 deletion submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
source activate BOEM

cd ~/BOEM/
python main.py check_annotations=True active_learning.pool_limit=20000 active_testing.n_images=1 active_learning.n_images=30 detection_model.trainer.epochs = 20
python main.py check_annotations=True active_learning.pool_limit=10000 active_testing.n_images=1 active_learning.n_images=100 pipeline_evaluation.debug=False
18 changes: 18 additions & 0 deletions submit_USGS.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash
#SBATCH --job-name=BOEM # Job name
#SBATCH --mail-type=END # Mail events
#SBATCH [email protected] # Where to send mail
#SBATCH --account=ewhite
#SBATCH --nodes=1 # Number of MPI ran
#SBATCH --cpus-per-task=1
#SBATCH --mem=150GB
#SBATCH --time=48:00:00 #Time limit hrs:min:sec
#SBATCH --output=/home/b.weinstein/logs/BOEM%j.out # Standard output and error log
#SBATCH --error=/home/b.weinstein/logs/BOEM%j.err
#SBATCH --partition=gpu
#SBATCH --gpus=1

source activate BOEM

cd ~/BOEM/
python USGS_backbone.py
25 changes: 12 additions & 13 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,23 +40,22 @@ def config(tmpdir_factory):
# Create sample bounding box annotations
train_data = {
'image_path': ['empty.jpg', 'birds.jpg', "birds.jpg"],
'xmin': [0, 200, 150],
'ymin': [0, 300, 250],
'xmax': [0, 300, 250],
'ymax': [0, 400, 350],
'label': ['Bird', 'Bird', 'Bird2'],
'xmin': [20, 200, 150],
'ymin': [10, 300, 250],
'xmax': [40, 300, 250],
'ymax': [20, 400, 350],
'label': ['FalsePositive', 'Bird', 'Bird2'],
'annotator': ['test_user', 'test_user', 'test_user']
}

val_data = {
'image_path': ['birds_val.jpg', 'birds_val.jpg'],
'xmin': [150, 150],
'ymin': [250, 250],
'xmax': [250, 250],
'ymax': [350, 350],
'label': ['Bird', 'Bird2'],
'annotator': ['test_user', 'test_user'],
"score": [0.9, 0.8]
'image_path': ['empty.jpg','birds_val.jpg', 'birds_val.jpg'],
'xmin': [None,150, 150],
'ymin': [None,250, 250],
'xmax': [None,250, 250],
'ymax': [None,350, 350],
'label': ['Bird','Bird', 'Bird2'],
'annotator': ['test_user','test_user', 'test_user'],
}

metadata = {
Expand Down
Loading

0 comments on commit c52bcc3

Please sign in to comment.