diff --git a/USGS_backbone.py b/USGS_backbone.py
new file mode 100644
index 0000000..795328b
--- /dev/null
+++ b/USGS_backbone.py
@@ -0,0 +1,64 @@
+from deepforest import main
+import pandas as pd
+import os
+import tempfile
+import comet_ml
+from pytorch_lightning.loggers import CometLogger
+
+df = pd.read_csv("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/20231116_cropped_annotations.csv")
+df.wat_label.value_counts()
+df = df[df.wat_label.isin(["Bird","Cartilaginous Fish","Bony Fish","Mammal","Reptile"])]
+
+# Combine Fish classes
+df.loc[df.wat_label.isin(["Cartilaginous Fish","Bony Fish"]),"wat_label"] = "Fish"
+
+# Construct padded crop name
+df["image_path"] = df["bname_parent"] +"_" + df["tile_xtl"].astype(str) + "_" + df["tile_ytl"].astype(str) + "_" + df["tile_xbr"].astype(str) + "_" + df["tile_ybr"].astype(str) + ".JPG"
+
+# Check if all images exist 
+df["image_exists"] = df["image_path"].apply(lambda x: os.path.exists(os.path.join("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded",x)))
+
+df["xmin"] = df["xtl"]
+df["ymin"] = df["ytl"]
+df["xmax"] = df["xbr"]
+df["ymax"] = df["ybr"]
+df["label"] = df["wat_label"]
+
+# Randomly split 80 - 20 for each class
+train = df.groupby("wat_label").sample(frac=0.85)
+test = df.drop(train.index)
+
+# Write to tmp data directory
+tmpdir = tempfile.mkdtemp()
+train.to_csv(os.path.join(tmpdir,"train.csv"),index=False)
+test.to_csv(os.path.join(tmpdir,"test.csv"),index=False)
+
+# Initialize new Deepforest model ( the model that you will train ) with your classes
+m = main.deepforest(config_args={"num_classes":4}, label_dict={"Bird":0,"Fish":1,"Mammal":2,"Reptile":3})
+
+# Inatialize Deepforest model ( the model that you will modify its regression head ) 
+deepforest_release_model = main.deepforest()
+deepforest_release_model.load_model("weecology/deepforest-bird") # or load_model('weecology/deepforest-bird')
+
+# Extract single class backbone that will have useful features for multi-class classification
+m.model.backbone.load_state_dict(deepforest_release_model.model.backbone.state_dict())
+
+# load regression head in the new model
+m.model.head.regression_head.load_state_dict(deepforest_release_model.model.head.regression_head.state_dict())
+
+m.config["train"]["csv_file"] = os.path.join(tmpdir,"train.csv")
+m.config["train"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded"
+m.config["train"]["fast_dev_run"] = False
+m.config["validation"]["csv_file"] = os.path.join(tmpdir,"test.csv")
+m.config["validation"]["root_dir"] = "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/20231118/padded"
+m.config["batch_size"] = 6
+m.config["train"]["epochs"] = 25
+m.config["validation"]["val_accuracy_interval"] = 5
+m.config["train"]["scheduler"]["params"]["eps"]  = 0
+comet_logger = CometLogger(project_name="BOEM", workspace="bw4sz")
+
+m.create_trainer(logger=comet_logger)
+m.trainer.fit(m)
+
+# Save the model
+m.trainer.save_checkpoint("/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/checkpoints/{}.pl".format(comet_logger.experiment.id))
diff --git a/conf/config.yaml b/conf/config.yaml
index c5f632a..3d0712a 100644
--- a/conf/config.yaml
+++ b/conf/config.yaml
@@ -26,7 +26,7 @@ predict:
   min_score: 0.4
 
 pipeline:
-  confidence_threshold: 0.5
+  confidence_threshold: 0.9
   limit_empty_frac: 0.01
 
 propagate:
@@ -34,12 +34,12 @@ propagate:
   distance_threshold_pixels: 50
 
 detection_model:
-  checkpoint: bird
+  checkpoint: "/blue/ewhite/b.weinstein/BOEM/UBFAI Annotations/checkpoints/5420a9c3f27d4299992094a7b9b49cb7.pl"
   checkpoint_dir: /blue/ewhite/b.weinstein/BOEM/detection/checkpoints
   train_csv_folder: /blue/ewhite/b.weinstein/BOEM/annotations/train/
   train_image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27/annotated
   crop_image_dir: /blue/ewhite/b.weinstein/BOEM/detection/crops/
-  limit_empty_frac: 0.25
+  limit_empty_frac: 0.2
   labels:
     - "Bird"
   trainer: 
@@ -49,7 +49,7 @@ detection_model:
       lr: 0.0001
     workers: 0
     validation:
-      val_accuracy_interval: 20
+      val_accuracy_interval: 3
 
 classification_model:
   checkpoint: 
@@ -84,7 +84,7 @@ active_learning:
   n_images: 50
   patch_size: 2000
   patch_overlap: 0
-  min_score: 0.25
+  min_score: 0.1
   model_checkpoint:
   target_labels:
     - "Bird"
@@ -98,7 +98,7 @@ active_learning:
 active_testing:
   image_dir: /blue/ewhite/b.weinstein/BOEM/sample_flight/JPG_2024_Jan27
   strategy: 'random'
-  n_images: 1
+  n_images: 1000
   m: 
   patch_size: 2000
   patch_overlap: 0
diff --git a/src/active_learning.py b/src/active_learning.py
index e0b579d..73749d1 100644
--- a/src/active_learning.py
+++ b/src/active_learning.py
@@ -70,10 +70,15 @@ def update_sys_path():
                 dask_results.append(pd.concat(block_result))
             preannotations = pd.concat(dask_results)
         else:
-            preannotations = detection.predict(m=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap)
+            preannotations = detection.predict(m=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap, batch_size=32)
             preannotations = pd.concat(preannotations)
 
+        # Print the number of preannotations before removing min score
+        print("There are {} preannotations before removing min score".format(preannotations.shape[0]))
+        print("There are {} images before removing min score".format(preannotations["image_path"].nunique()))
         preannotations = preannotations[preannotations["score"] >= min_score]
+        print("There are {} preannotations after removing min score".format(preannotations.shape[0]))
+        print("There are {} images after removing min score".format(preannotations["image_path"].nunique()))
         
         if strategy == "most-detections":
             # Sort images by total number of predictions
@@ -158,6 +163,7 @@ def update_sys_path():
             preannotations = detection.predict(model=model, image_paths=pool, patch_size=patch_size, patch_overlap=patch_overlap)
             preannotations = pd.concat(preannotations)
         
+        print("There are {} preannotations before removing min score".format(preannotations.shape[0]))
         preannotations = preannotations[preannotations["score"] >= min_score]
 
         if strategy == "most-detections":
diff --git a/src/detection.py b/src/detection.py
index 7fa1578..6a7f4eb 100644
--- a/src/detection.py
+++ b/src/detection.py
@@ -59,7 +59,7 @@ def load(checkpoint, annotations = None):
 
     if not annotations is None:
         num_labels = len(annotations.label.unique())
-        if num_labels != len(snapshot.label_dict):
+        if num_labels > len(snapshot.label_dict):
             snapshot = extract_backbone(snapshot, annotations)
 
     return snapshot
@@ -135,6 +135,10 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
     """
     tmpdir = tempfile.gettempdir()
 
+   # Fix taxonomy
+    train_annotations = fix_taxonomy(train_annotations)
+    test_annotations = fix_taxonomy(test_annotations)
+
     train_annotations.to_csv(os.path.join(tmpdir,"train.csv"), index=False)
     test_annotations.to_csv(os.path.join(tmpdir,"test.csv"), index=False)
 
@@ -142,8 +146,8 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
     model.config["train"]["csv_file"] = os.path.join(tmpdir,"train.csv")
     model.config["train"]["root_dir"] = train_image_dir
 
-    #model.config["validation"]["csv_file"] = os.path.join(tmpdir,"test.csv")
-    #model.config["validation"]["root_dir"] = train_image_dir
+    model.config["validation"]["csv_file"] = os.path.join(tmpdir,"test.csv")
+    model.config["validation"]["root_dir"] = train_image_dir
 
     # Loop through all keys in model.config and set them to the value of the key in model.config
     config_args = OmegaConf.to_container(config_args)
@@ -173,13 +177,14 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
             sample_train_annotations_for_image.root_dir = train_image_dir
             visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
             comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
-    # with comet_logger.experiment.context_manager("test_images"):
-    #     non_empty_train_annotations = read_file(model.config["validation"]["csv_file"], root_dir=train_image_dir)
-    #     for filename in non_empty_train_annotations.image_path.sample(5):
-    #         sample_train_annotations_for_image = non_empty_train_annotations[non_empty_train_annotations.image_path == filename]
-    #         sample_train_annotations_for_image.root_dir = train_image_dir
-    #         visualize.plot_annotations(sample_train_annotations_for_image, savedir=tmpdir)
-    #         comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
+    
+    with comet_logger.experiment.context_manager("test_images"):
+        non_empty_validation_annotations = read_file(model.config["validation"]["csv_file"], root_dir=train_image_dir)
+        for filename in non_empty_validation_annotations.image_path.head(5):
+            sample_validation_annotations_for_image = non_empty_validation_annotations[non_empty_validation_annotations.image_path == filename]
+            sample_validation_annotations_for_image.root_dir = train_image_dir
+            visualize.plot_annotations(sample_validation_annotations_for_image, savedir=tmpdir)
+            comet_logger.experiment.log_image(os.path.join(tmpdir, filename))
 
     model.trainer.fit(model)
 
@@ -196,6 +201,12 @@ def train(model, train_annotations, test_annotations, train_image_dir, comet_pro
     
     return model
 
+def fix_taxonomy(df):
+    df["label"] = df.label.replace('Turtle', 'Reptile')
+    df["label"] = df.label.replace('Cetacean', 'Mammal')
+
+    return df
+
 def preprocess_and_train(config, model_type="detection"):
     """Preprocess data and train model.
     
@@ -208,6 +219,7 @@ def preprocess_and_train(config, model_type="detection"):
     # Get and split annotations
     train_df = gather_data(config.detection_model.train_csv_folder)
     validation = gather_data(config.label_studio.csv_dir_validation)
+    
     validation.loc[validation.label==0,"label"] = "Bird"
 
     # Remove the empty frames, using hard mining instead
@@ -239,7 +251,9 @@ def preprocess_and_train(config, model_type="detection"):
     if config.detection_model.limit_empty_frac > 0:
         train_df = limit_empty_frames(train_df, config.detection_model.limit_empty_frac)
         if not validation_df.empty:
-            validation_df = limit_empty_frames(validation_df, config.detection_model.limit_empty_frac)
+            #validation_df = limit_empty_frames(validation_df, config.detection_model.limit_empty_frac)
+            # DeepForest evaluate doesn't work with empty frames yet, see https://github.com/weecology/DeepForest/pull/858
+            validation_df = validation_df[validation_df.xmin!=0]
 
 
     # Train model
@@ -283,7 +297,7 @@ def get_latest_checkpoint(checkpoint_dir, annotations):
 
     return m
 
-def _predict_list_(image_paths, patch_size, patch_overlap, model_path, m=None, crop_model=None):
+def _predict_list_(image_paths, patch_size, patch_overlap, model_path, m=None, crop_model=None, batch_size=64):
     if model_path:
         m = load(model_path)
     else:
@@ -291,17 +305,17 @@ def _predict_list_(image_paths, patch_size, patch_overlap, model_path, m=None, c
             raise ValueError("A model or model_path is required for prediction.")
 
     m.create_trainer(fast_dev_run=False)
-
+    m.config["batch_size"] = batch_size
     predictions = []
     for image_path in image_paths:
-        prediction = m.predict_tile(raster_path=image_path, return_plot=False, patch_size=patch_size, patch_overlap=patch_overlap, crop_model=crop_model, verbose=True)
+        prediction = m.predict_tile(raster_path=image_path, return_plot=False, patch_size=patch_size, patch_overlap=patch_overlap, crop_model=crop_model)
         if prediction is None:
             prediction = pd.DataFrame({"image_path": image_path, "xmin": [None], "ymin": [None], "xmax": [None], "ymax": [None], "label": [None], "score": [None]})
         predictions.append(prediction)
 
     return predictions
 
-def predict(image_paths, patch_size, patch_overlap, m=None, model_path=None, dask_client=None, crop_model=None):
+def predict(image_paths, patch_size, patch_overlap, m=None, model_path=None, dask_client=None, crop_model=None, batch_size=8):
     """Predict bounding boxes for images
     Args:
         m (main.deepforest): A trained deepforest model.
@@ -309,6 +323,7 @@ def predict(image_paths, patch_size, patch_overlap, m=None, model_path=None, das
         crop_model (main.deepforest): A trained deepforest model for classification.
         model_path (str): The path to a model checkpoint.
         dask_client (dask.distributed.Client): A dask client for parallel prediction.
+        batch_size (int): The batch size for prediction.
     Returns:
         list: A list of image predictions.
     """
@@ -337,6 +352,6 @@ def update_sys_path():
             block_result = block_result.result()
             predictions.append(pd.concat(block_result))
     else:
-        predictions = _predict_list_(image_paths=image_paths, patch_size=patch_size, patch_overlap=patch_overlap, model_path=model_path, m=m, crop_model=crop_model)
+        predictions = _predict_list_(image_paths=image_paths, patch_size=patch_size, patch_overlap=patch_overlap, model_path=model_path, m=m, crop_model=crop_model, batch_size=batch_size)
 
     return predictions
diff --git a/src/pipeline.py b/src/pipeline.py
index 8ab23cb..ed4ca95 100644
--- a/src/pipeline.py
+++ b/src/pipeline.py
@@ -165,8 +165,8 @@ def run(self):
                 min_score=self.config.active_learning.min_score
             )
 
-            print(f"Images requiring human review: {len(confident_predictions)}")
-            print(f"Images auto-annotated: {len(uncertain_predictions)}")
+            print(f"Images requiring human review: {len(uncertain_predictions)}")
+            print(f"Images auto-annotated: {len(confident_predictions)}")
 
             # Intelligent cropping
             image_paths = uncertain_predictions["image_path"].unique()
@@ -199,6 +199,7 @@ def run(self):
                     uncertain_predictions=uncertain_predictions,
                     pipeline_monitor=pipeline_monitor)
 
-                reporter.generate_report()
+                reporter.generate_report(create_video=True)
         else:
             print("No images to annotate")
+
diff --git a/src/pipeline_evaluation.py b/src/pipeline_evaluation.py
index 1794f6d..bccdfc0 100644
--- a/src/pipeline_evaluation.py
+++ b/src/pipeline_evaluation.py
@@ -128,6 +128,7 @@ def predict_classification(self):
             image_paths=full_image_paths, 
             patch_size=self.patch_size, 
             patch_overlap=self.patch_overlap, 
+            batch_size=32
         )
         combined_predictions = pd.concat(predictions)
         self.predictions.append(combined_predictions)
diff --git a/src/reporting.py b/src/reporting.py
index 18b8bb5..43dca99 100644
--- a/src/reporting.py
+++ b/src/reporting.py
@@ -50,14 +50,15 @@ def concat_predictions(self):
         """
         self.all_predictions = pd.concat(self.pipeline_monitor.predictions, ignore_index=True)
 
-    def generate_report(self):
+    def generate_report(self, create_video=False):
         """Generate a report"""
 
         if self.pipeline_monitor:
             self.concat_predictions()
             self.write_predictions()
         self.write_metrics()
-        self.generate_video()
+        if create_video:
+            self.generate_video()
 
     def write_predictions(self):
         """Write predictions to a csv file"""
diff --git a/submit.sh b/submit.sh
index 01a4aa0..7701ca7 100644
--- a/submit.sh
+++ b/submit.sh
@@ -15,4 +15,4 @@
 source activate BOEM
 
 cd ~/BOEM/
-python main.py check_annotations=True active_learning.pool_limit=20000 active_testing.n_images=1 active_learning.n_images=30 detection_model.trainer.epochs = 20
+python main.py check_annotations=True active_learning.pool_limit=10000 active_testing.n_images=1 active_learning.n_images=100 pipeline_evaluation.debug=False
diff --git a/submit_USGS.sh b/submit_USGS.sh
new file mode 100644
index 0000000..59f2a82
--- /dev/null
+++ b/submit_USGS.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+#SBATCH --job-name=BOEM   # Job name
+#SBATCH --mail-type=END               # Mail events
+#SBATCH --mail-user=benweinstein2010@gmail.com  # Where to send mail
+#SBATCH --account=ewhite
+#SBATCH --nodes=1                 # Number of MPI ran
+#SBATCH --cpus-per-task=1
+#SBATCH --mem=150GB
+#SBATCH --time=48:00:00       #Time limit hrs:min:sec
+#SBATCH --output=/home/b.weinstein/logs/BOEM%j.out   # Standard output and error log
+#SBATCH --error=/home/b.weinstein/logs/BOEM%j.err
+#SBATCH --partition=gpu
+#SBATCH --gpus=1
+
+source activate BOEM
+
+cd ~/BOEM/
+python USGS_backbone.py
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index 8df4acb..69344d4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -40,23 +40,22 @@ def config(tmpdir_factory):
     # Create sample bounding box annotations
     train_data = {
         'image_path': ['empty.jpg', 'birds.jpg', "birds.jpg"],
-        'xmin': [0, 200, 150],
-        'ymin': [0, 300, 250],
-        'xmax': [0, 300, 250],
-        'ymax': [0, 400, 350],
-        'label': ['Bird', 'Bird', 'Bird2'],
+        'xmin': [20, 200, 150],
+        'ymin': [10, 300, 250],
+        'xmax': [40, 300, 250],
+        'ymax': [20, 400, 350],
+        'label': ['FalsePositive', 'Bird', 'Bird2'],
         'annotator': ['test_user', 'test_user', 'test_user']
     }
 
     val_data = {
-        'image_path': ['birds_val.jpg', 'birds_val.jpg'],
-        'xmin': [150, 150],
-        'ymin': [250, 250],
-        'xmax': [250, 250],
-        'ymax': [350, 350],
-        'label': ['Bird', 'Bird2'],
-        'annotator': ['test_user', 'test_user'],
-        "score": [0.9, 0.8]
+        'image_path': ['empty.jpg','birds_val.jpg', 'birds_val.jpg'],
+        'xmin': [None,150, 150],
+        'ymin': [None,250, 250],
+        'xmax': [None,250, 250],
+        'ymax': [None,350, 350],
+        'label': ['Bird','Bird', 'Bird2'],
+        'annotator': ['test_user','test_user', 'test_user'],
     }
 
     metadata = {
diff --git a/tests/test_pipeline_evaluation.py b/tests/test_pipeline_evaluation.py
index a13d288..f75b9f1 100644
--- a/tests/test_pipeline_evaluation.py
+++ b/tests/test_pipeline_evaluation.py
@@ -17,12 +17,13 @@ def predict_tile(self, raster_path, patch_size=450, patch_overlap=0, return_plot
             # Return realistic predictions based on image name
             if "empty" in raster_path.lower():
                 return pd.DataFrame({
-                    'xmin': [],
-                    'ymin': [],
-                    'xmax': [],
-                    'ymax': [],
-                    'label': [],
-                    'score': []
+                    'xmin': [None],
+                    'ymin': [None],
+                    'xmax': [None],
+                    'ymax': [None],
+                    'label': [None],
+                    'score': [None],
+                    "image_path": [os.path.basename(raster_path)]
                 })
                 
             # If random, Generate 1-3 random predictions for non-empty images
@@ -34,7 +35,7 @@ def predict_tile(self, raster_path, patch_size=450, patch_overlap=0, return_plot
                         'xmax': np.random.randint(800, 1000, num_predictions),
                         'ymax': np.random.randint(600, 800, num_predictions),
                         'label': ['Bird1'] * num_predictions,
-                        'score': np.random.uniform(0.5, 0.99, num_predictions),
+                        'score': np.random.uniform(0.1, 0.99, num_predictions),
                         'image_path': [os.path.basename(raster_path)] * num_predictions
                     })
             else: