Skip to content

Commit

Permalink
false positive checking
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Dec 17, 2024
1 parent e17de33 commit 148fdcf
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 60 deletions.
20 changes: 12 additions & 8 deletions conf/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ comet:

check_annotations: true
# Force upload bypasses the pipeline, useful for debugging and starting a new project
force_upload: true
force_upload: False
force_training: True

label_studio:
project_name_train: "Bureau of Ocean Energy Management - Training"
project_name_validation: "Bureau of Ocean Energy Management - Validation"
Expand Down Expand Up @@ -37,15 +39,17 @@ detection_model:
train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/train/
train_image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
crop_image_dir: /blue/ewhite/b.weinstein/BOEM/detection/crops/
limit_empty_frac: 0.05
limit_empty_frac: 0.25
labels:
- "Bird"
trainer:
trainer:
train:
fast_dev_run: False
epochs: 3
lr: 0.00001
epochs: 10
lr: 0.0001
workers: 0
validation:
val_accuracy_interval: 20

classification_model:
checkpoint:
Expand All @@ -56,8 +60,8 @@ classification_model:
under_sample_ratio: 0
trainer:
fast_dev_run: False
max_epochs: 4
lr: 0.001
max_epochs: 1
lr: 0.00001

pipeline_evaluation:
detect_ground_truth_dir: /blue/ewhite/b.weinstein/BOEM/annotations/validation
Expand All @@ -80,7 +84,7 @@ active_learning:
n_images: 50
patch_size: 2000
patch_overlap: 0
min_score: 0.3
min_score: 0.25
model_checkpoint:
target_labels:
- "Bird"
Expand Down
5 changes: 3 additions & 2 deletions src/active_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def choose_train_images(evaluation, image_dir, strategy, n=10, patch_size=512, p
Returns:
list: A list of image paths.
"""
pool = glob.glob(os.path.join(image_dir,"*")) # Get all images in the data directory
pool = glob.glob(os.path.join(image_dir,"*.jpg")) # Get all images in the data directory

# Remove .csv files from the pool
pool = [image for image in pool if not image.endswith('.csv')]

Expand Down Expand Up @@ -81,7 +82,7 @@ def update_sys_path():
if target_labels is None:
raise ValueError("Target labels are required for the 'target-labels' strategy.")
# Filter images by target labels
chosen_images = preannotations[preannotations.label.isin(target_labels)].groupby("image_path").size().sort_values(ascending=False).head(n).index.tolist()
chosen_images = preannotations[preannotations.label.isin(target_labels)].groupby("image_path")["score"].mean().sort_values(ascending=False).head(n).index.tolist()
else:
raise ValueError("Invalid strategy. Must be one of 'random', 'most-detections', or 'target-labels'.")
# Get full path
Expand Down
40 changes: 28 additions & 12 deletions src/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from logging import warn
from deepforest import preprocess
from deepforest.utilities import read_file
from typing import Optional, Union, List, Dict
import numpy as np
from scipy.spatial import ConvexHull
Expand Down Expand Up @@ -52,7 +53,8 @@ def preprocess_images(
save_dir: str,
limit_empty_frac: float = 0.1,
patch_size: int = 450,
patch_overlap: int = 0
patch_overlap: int = 0,
allow_empty: bool = False
) -> pd.DataFrame:
"""
Cut images into GPU-friendly chunks and process annotations accordingly.
Expand All @@ -68,6 +70,7 @@ def preprocess_images(
limit_empty_frac: Maximum fraction of empty patches to keep
patch_size: Size of the output patches in pixels
patch_overlap: Overlap between patches in pixels
allow_empty: Whether to allow patches without annotations
Returns:
DataFrame containing annotations for the processed image patches
Expand All @@ -88,11 +91,6 @@ def preprocess_images(

for image_path in annotations.image_path.unique():
annotation_df = annotations[annotations.image_path == image_path]

if annotation_df.empty:
allow_empty = True
else:
allow_empty = False

crop_annotation = process_image(
image_path=image_path,
Expand Down Expand Up @@ -144,12 +142,6 @@ def process_image(

full_path = os.path.join(root_dir, image_path)

# Check if all xmin values are 0, indicating empty annotations
if annotation_df is not None and all(annotation_df['xmin'] == 0):
allow_empty = True
else:
allow_empty = False

crop_annotation = preprocess.split_raster(
path_to_raster=full_path,
annotations_file=annotation_df,
Expand All @@ -159,7 +151,31 @@ def process_image(
root_dir=root_dir,
allow_empty=allow_empty
)

# Split FalsePositives out for hard negative mining, for those images with no true positives
true_positive_images = crop_annotation.loc[crop_annotation.label != "FalsePositive", "image_path"].unique()

# Remove any FalsePositives that are in true positive images
crop_annotation = crop_annotation.loc[~((crop_annotation.label == "FalsePositive") & (crop_annotation.image_path.isin(true_positive_images))), :]
crop_annotation.loc[crop_annotation.label == "FalsePositive", ["xmin", "ymin", "xmax", "ymax"]] = 0
crop_annotation["label"] = crop_annotation["label"].astype(str)
crop_annotation.loc[crop_annotation.label == "FalsePositive", "label"] = "Bird"
crop_annotation.loc[crop_annotation.label == "0", "label"] = "Bird"

# Update geometry for FalsePositives
crop_annotation.drop(columns=["geometry"], inplace=True)
crop_annotation = pd.DataFrame(crop_annotation)
crop_annotation = read_file(crop_annotation)

# Remove duplicates
crop_annotation = crop_annotation.drop_duplicates(subset=["image_path", "xmin", "ymin", "xmax", "ymax", "label"])

# Remove FalsePositives that are in true positive images
crop_annotation = crop_annotation.loc[~((crop_annotation.label == "FalsePositive") & (crop_annotation.image_path.isin(true_positive_images))), :]

# Save over the original csv
crop_annotation.to_csv(crop_csv, index=False)

if annotation_df is None:
empty_annotations = []
for i in range(len(crop_annotation)):
Expand Down
66 changes: 40 additions & 26 deletions src/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,15 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
tmpdir = tempfile.gettempdir()

train_annotations.to_csv(os.path.join(tmpdir,"train.csv"), index=False)
test_annotations.to_csv(os.path.join(tmpdir,"test.csv"), index=False)

# Set config
model.config["train"]["csv_file"] = os.path.join(tmpdir,"train.csv")
model.config["train"]["root_dir"] = train_image_dir

#model.config["validation"]["csv_file"] = os.path.join(tmpdir,"test.csv")
#model.config["validation"]["root_dir"] = train_image_dir

# Loop through all keys in model.config and set them to the value of the key in model.config
config_args = OmegaConf.to_container(config_args)
if config_args:
Expand All @@ -162,24 +166,20 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
else:
model.create_trainer()

# with comet_logger.experiment.context_manager("train_images"):
# non_empty_train_annotations = train_annotations[~(train_annotations.xmax==0)]
# try:
# non_empty_train_annotations= gpd.GeoDataFrame(non_empty_train_annotations, geometry=non_empty_train_annotations["geometry"])
# non_empty_train_annotations.root_dir = train_image_dir
# non_empty_train_annotations = read_file(non_empty_train_annotations)
# except:
# non_empty_train_annotations = read_file(non_empty_train_annotations, root_dir=train_image_dir)

# if non_empty_train_annotations.empty:
# pass
# else:
# sample_train_annotations = non_empty_train_annotations[non_empty_train_annotations.image_path.isin(non_empty_train_annotations.image_path.head(5))]
# for filename in sample_train_annotations.image_path:
# sample_train_annotations_for_image = sample_train_annotations[sample_train_annotations.image_path == filename]
# sample_train_annotations_for_image.root_dir = train_image_dir
# visualize.plot_results(sample_train_annotations_for_image, savedir=tmpdir)
# comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
with comet_logger.experiment.context_manager("train_images"):
non_empty_train_annotations = read_file(model.config["train"]["csv_file"], root_dir=train_image_dir)
for filename in non_empty_train_annotations.image_path.sample(5):
sample_train_annotations_for_image = non_empty_train_annotations[non_empty_train_annotations.image_path == filename]
sample_train_annotations_for_image.root_dir = train_image_dir
visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
# with comet_logger.experiment.context_manager("test_images"):
# non_empty_train_annotations = read_file(model.config["validation"]["csv_file"], root_dir=train_image_dir)
# for filename in non_empty_train_annotations.image_path.sample(5):
# sample_train_annotations_for_image = non_empty_train_annotations[non_empty_train_annotations.image_path == filename]
# sample_train_annotations_for_image.root_dir = train_image_dir
# visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
# comet_logger.experiment.log_image(os.path.join(tmpdir, filename))

model.trainer.fit(model)

Expand Down Expand Up @@ -207,22 +207,32 @@ def preprocess_and_train(config, model_type="detection"):
"""
# Get and split annotations
train_df = gather_data(config.detection_model.train_csv_folder)
validation_df = gather_data(config.label_studio.csv_dir_validation)
validation_df.loc[validation_df.label==0,"label"] = "Bird"
validation = gather_data(config.label_studio.csv_dir_validation)
validation.loc[validation.label==0,"label"] = "Bird"

# Remove the empty frames, using hard mining instead
train_df = train_df[~(train_df.label.astype(str)== "0")]

# Preprocess train and validation data
train_df = data_processing.preprocess_images(train_df,
root_dir=config.detection_model.train_image_dir,
save_dir=config.detection_model.crop_image_dir)
save_dir=config.detection_model.crop_image_dir,
patch_size=config.predict.patch_size,
patch_overlap=config.predict.patch_overlap)

non_empty = train_df[train_df.xmin!=0]

train_df.loc[train_df.label==0,"label"] = "Bird"
validation.loc[validation.label==0,"label"] = "Bird"

if not validation_df.empty:
validation_df = data_processing.preprocess_images(validation_df,
if not validation.empty:
validation_df = data_processing.preprocess_images(validation,
root_dir=config.detection_model.train_image_dir,
save_dir=config.detection_model.crop_image_dir)
non_empty = validation_df[validation_df.xmin!=0]
save_dir=config.detection_model.crop_image_dir,
patch_size=config.predict.patch_size,
patch_overlap=config.predict.patch_overlap,
allow_empty=True
)
validation_df.loc[validation_df.label==0,"label"] = "Bird"

# Limit empty frames
Expand All @@ -231,6 +241,7 @@ def preprocess_and_train(config, model_type="detection"):
if not validation_df.empty:
validation_df = limit_empty_frames(validation_df, config.detection_model.limit_empty_frac)


# Train model
# Load existing model
if config.detection_model.checkpoint:
Expand All @@ -240,6 +251,9 @@ def preprocess_and_train(config, model_type="detection"):
else:
raise ValueError("No checkpoint or checkpoint directory found.")

# Assert no FalsePositive label in train
assert "FalsePositive" not in train_df.label.unique(), "FalsePositive label found in training data."

trained_model = train(train_annotations=train_df,
test_annotations=validation_df,
train_image_dir=config.detection_model.crop_image_dir,
Expand Down Expand Up @@ -280,7 +294,7 @@ def _predict_list_(image_paths, patch_size, patch_overlap, model_path, m=None, c

predictions = []
for image_path in image_paths:
prediction = m.predict_tile(raster_path=image_path, return_plot=False, patch_size=patch_size, patch_overlap=patch_overlap, crop_model=crop_model)
prediction = m.predict_tile(raster_path=image_path, return_plot=False, patch_size=patch_size, patch_overlap=patch_overlap, crop_model=crop_model, verbose=True)
if prediction is None:
prediction = pd.DataFrame({"image_path": image_path, "xmin": [None], "ymin": [None], "xmax": [None], "ymax": [None], "label": [None], "score": [None]})
predictions.append(prediction)
Expand Down
14 changes: 6 additions & 8 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,27 +64,24 @@ def run(self):
if new_val_annotations is None:
if self.config.force_upload:
print("No new annotations, but force_upload is set to True, continuing")
self.skip_training = True
else:
elif not self.config.force_training:
print("No new annotations, exiting")
return None
else:
print(f"No new annotations, but force training is {self.config.force_training} and force upload is {self.config.force_upload}, continuing")
else:
try:
print(f"New train annotations found: {len(new_train_annotations)}")
except:
pass
print(f"New val annotations found: {len(new_val_annotations)}")

self.skip_training = False

# Given new annotations, propogate labels to nearby images
# label_propagator = propagate.LabelPropagator(
# **self.config.propagate)
# label_propagator.through_time(new_annotations)
else:
self.skip_training = False

if not self.skip_training:
if self.config.force_training:
trained_detection_model = detection.preprocess_and_train(
self.config)

Expand Down Expand Up @@ -203,4 +200,5 @@ def run(self):
pipeline_monitor=pipeline_monitor)

reporter.generate_report()

else:
print("No images to annotate")
5 changes: 2 additions & 3 deletions src/pipeline_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,10 @@ def evaluate_uncertain_classification(self):
image_targets = self.classification_annotations.loc[self.classification_annotations.image_path == os.path.basename(image_path)]
image_predictions = self.uncertain_predictions.loc[self.uncertain_predictions.image_path == os.path.basename(image_path)]
image_predictions = image_predictions[image_predictions.score > self.min_score]
if image_predictions.empty:
continue
target = self._format_targets(image_targets)
pred = self._format_targets(image_predictions)

if len(pred["labels"]) == 0:
continue
targets.append(target)
preds.append(pred)

Expand Down
2 changes: 1 addition & 1 deletion submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
source activate BOEM

cd ~/BOEM/
python main.py check_annotations=False active_learning.pool_limit=100 active_testing.n_images=10 active_learning.n_images=10
python main.py check_annotations=True active_learning.pool_limit=20000 active_testing.n_images=1 active_learning.n_images=30 detection_model.trainer.epochs = 20

0 comments on commit 148fdcf

Please sign in to comment.