Merge pull request #468 from microsoft/PreRelease

PytorchWildlifev1.0.2.9
microsoft · Apr 12, 2024 · a52ca80 · a52ca80
2 parents 13622b3 + c6ef31a
commit a52ca80
Show file tree

Hide file tree

Showing 9 changed files with 180 additions and 14 deletions.
diff --git a/PytorchWildlife/models/detection/yolov5/base_detector.py b/PytorchWildlife/models/detection/yolov5/base_detector.py
@@ -154,7 +154,16 @@ def batch_image_detection(self, dataloader, conf_thres=0.2, id_strip=None):
 
         # If there are size differences in the input images, use a for loop instead of matrix processing for scaling
         for pred, size, path in zip(total_preds, total_img_sizes, total_paths):
+            original_coords = pred[:, :4].copy()
+            normalized_coords = []
             pred[:, :4] = scale_coords([self.IMAGE_SIZE] * 2, pred[:, :4], size).round()
-            results.append(self.results_generation(pred, path, id_strip))
+            res = self.results_generation(pred, path, id_strip)
+            # Normalize the coordinates for timelapse compatibility
+            for coord in pred[:, :4]:
+                x1, y1, x2, y2 = coord
+                x1, y1, x2, y2 = x1 / size[1], y1 / size[0], x2 / size[1], y2 / size[0]
+                normalized_coords.append([x1, y1, x2, y2])
+            res["normalized_coords"] = normalized_coords
+            results.append(res)
 
         return results
diff --git a/PytorchWildlife/utils/post_process.py b/PytorchWildlife/utils/post_process.py
@@ -15,6 +15,8 @@
     "save_crop_images",
     "save_detection_json",
     "save_detection_classification_json",
+    "save_detection_timelapse_json",
+    "save_detection_classification_timelapse_json",
 ]
 
 
@@ -119,6 +121,48 @@ def save_detection_json(results, output_dir, categories=None, exclude_category_i
 
         json.dump(json_results, f, indent=4)
 
+def save_detection_timelapse_json(det_results, output_dir, categories=None):
+    """
+    Save detection results to a JSON file.
+
+    Args:
+        results (list):
+            Detection results containing image ID, bounding boxes, category, and confidence.
+        output_dir (str):
+            Path to save the output JSON file.
+        categories (list, optional):
+            List of categories for detected objects. Defaults to None.
+    """
+    json_results = {
+        "info": {"detector": "megadetector_v5"},
+        "detection_categories": categories,
+        "images": []
+    }
+
+    for det_r in det_results:
+        image_annotations = {
+            "file": det_r["img_id"],
+            "max_detection_conf": max(det_r["detections"].confidence.tolist()),
+            "detections": []
+        }
+
+        for i in range(len(det_r["detections"])):
+            det = det_r["detections"][i]
+            normalized_bbox = [float(y) for y in det_r["normalized_coords"][i]]
+            detection = {
+                "category": str(det.class_id[0]),
+                "conf": float(det.confidence[0]),
+                "bbox": [normalized_bbox[0], normalized_bbox[1], normalized_bbox[2]-normalized_bbox[0], normalized_bbox[3]-normalized_bbox[1]],
+                "classifications": []
+            }
+
+            image_annotations["detections"].append(detection)
+
+        json_results["images"].append(image_annotations)
+
+    with open(output_dir, "w") as f:
+        json.dump(json_results, f, indent=4)
+
 
 def save_detection_classification_json(
     det_results, clf_results, output_path, det_categories=None, clf_categories=None
@@ -176,4 +220,60 @@ def save_detection_classification_json(
                     "clf_confidence": [float(x) for x in clf_confidence],
                 }
             )
-        json.dump(json_results, f)
+        json.dump(json_results, f, indent=4)
+
+
+def save_detection_classification_timelapse_json(
+    det_results, clf_results, output_path, det_categories=None, clf_categories=None
+):
+    """
+    Save detection and classification results to a JSON file in the specified format.
+
+    Args:
+        det_results (list):
+            Detection results containing image ID, bounding boxes, detection category, and confidence.
+        clf_results (list):
+            Classification results containing image ID, classification category, and confidence.
+        output_path (str):
+            Path to save the output JSON file.
+        det_categories (dict, optional):
+            Dictionary of categories for detected objects. Defaults to None.
+        clf_categories (dict, optional):
+            Dictionary of categories for classified objects. Defaults to None.
+
+    """
+    json_results = {
+        "info": {"detector": "megadetector_v5"},
+        "detection_categories": det_categories,
+        "classification_categories": clf_categories,
+        "images": []
+    }
+
+    for det_r in det_results:
+        image_annotations = {
+            "file": det_r["img_id"],
+            "max_detection_conf": max(det_r["detections"].confidence.tolist()),
+            "detections": []
+        }
+
+        for i in range(len(det_r["detections"])):
+            det = det_r["detections"][i]
+            normalized_bbox = [float(y) for y in det_r["normalized_coords"][i]]
+            detection = {
+                "category": str(det.class_id[0]),
+                "conf": float(det.confidence[0]),
+                "bbox": [normalized_bbox[0], normalized_bbox[1], normalized_bbox[2]-normalized_bbox[0], normalized_bbox[3]-normalized_bbox[1]],
+                "classifications": []
+            }
+
+            # Find classifications for this detection
+            for clf_r in clf_results:
+                if clf_r["img_id"] == det_r["img_id"]:
+                    detection["classifications"].append([str(clf_r["class_id"]), float(clf_r["confidence"])])
+
+            image_annotations["detections"].append(detection)
+
+        json_results["images"].append(image_annotations)
+
+    with open(output_path, "w") as f:
+        json.dump(json_results, f, indent=4)
diff --git a/README.md b/README.md
@@ -17,7 +17,8 @@
 </div>
 
 
-## ✅ Update highlights (Version 1.0.2)
+## ✅ Update highlights (Version 1.0.2.9)
+- [x] Added Timelapse compatibility! Check the [Gradio interface](INSTALLATION.md) or [notebooks](https://github.com/microsoft/CameraTraps/tree/main/demo).
 - [x] Added Google Colab demos.
 - [x] Added Snapshot Serengeti classification model into the model zoo.
 - [x] Added Classification fine-tuning module.

diff --git a/demo/gradio_demo.py b/demo/gradio_demo.py
@@ -94,12 +94,13 @@ def single_image_detection(input_img, det_conf_thres, clf_conf_thres, img_index=
     return annotated_img
 
 
-def batch_detection(zip_file, det_conf_thres):
+def batch_detection(zip_file, timelapse, det_conf_thres):
     """Perform detection on a batch of images from a zip file and return path to results JSON.
     
     Args:
         zip_file (File): Zip file containing images.
         det_conf_thre (float): Confidence threshold for detection.
+        timelapse (boolean): Flag to output JSON for timelapse.
         clf_conf_thre (float): Confidence threshold for classification.
 
     Returns:
@@ -135,13 +136,23 @@ def batch_detection(zip_file, det_conf_thres):
         clf_loader = DataLoader(clf_dataset, batch_size=32, shuffle=False, 
                                 pin_memory=True, num_workers=4, drop_last=False)
         clf_results = classification_model.batch_image_classification(clf_loader, id_strip=tgt_folder_path)
-        pw_utils.save_detection_classification_json(det_results=det_results,
-                                                    clf_results=clf_results,
-                                                    det_categories=detection_model.CLASS_NAMES,
-                                                    clf_categories=classification_model.CLASS_NAMES,
-                                                    output_path=json_save_path)
+        if timelapse:
+            pw_utils.save_detection_classification_timelapse_json(det_results=det_results,
+                                                        clf_results=clf_results,
+                                                        det_categories=detection_model.CLASS_NAMES,
+                                                        clf_categories=classification_model.CLASS_NAMES,
+                                                        output_path=json_save_path)
+        else:
+            pw_utils.save_detection_classification_json(det_results=det_results,
+                                                        clf_results=clf_results,
+                                                        det_categories=detection_model.CLASS_NAMES,
+                                                        clf_categories=classification_model.CLASS_NAMES,
+                                                        output_path=json_save_path)
     else:
-        pw_utils.save_detection_json(det_results, json_save_path, categories=detection_model.CLASS_NAMES)
+        if timelapse:
+            pw_utils.save_detection_timelapse_json(det_results, json_save_path, categories=detection_model.CLASS_NAMES)
+        else:
+            pw_utils.save_detection_json(det_results, json_save_path, categories=detection_model.CLASS_NAMES)
 
     return json_save_path
 
@@ -199,6 +210,7 @@ def callback(frame, index):
         with gr.Row():
             with gr.Column():
                 bth_in = gr.File(label="Upload zip file.")
+                chck_timelapse = gr.Checkbox(label="Timelapse Output", info="Output JSON for timelapse.")
                 bth_conf_sl = gr.Slider(0, 1, label="Detection Confidence Threshold", value=0.2)
             bth_out = gr.File(label="Detection Results JSON.", height=200)
         bth_but = gr.Button("Detect Animals!")
@@ -220,7 +232,7 @@ def callback(frame, index):
 
     load_but.click(load_models, inputs=[det_drop, clf_drop], outputs=load_out)
     sgl_but.click(single_image_detection, inputs=[sgl_in, sgl_conf_sl_det, sgl_conf_sl_clf], outputs=sgl_out)
-    bth_but.click(batch_detection, inputs=[bth_in, bth_conf_sl], outputs=bth_out)
+    bth_but.click(batch_detection, inputs=[bth_in, chck_timelapse, bth_conf_sl], outputs=bth_out)
     vid_but.click(video_detection, inputs=[vid_in, vid_conf_sl_det, vid_conf_sl_clf, vid_fr, vid_enc], outputs=vid_out)
 
 if __name__ == "__main__":

diff --git a/demo/image_demo.py b/demo/image_demo.py
@@ -78,4 +78,8 @@
 # Saving the detection results in JSON format
 pw_utils.save_detection_json(results, os.path.join(".","batch_output.json"),
                              categories=detection_model.CLASS_NAMES,
-                             exclude_category_ids=[]) # Category IDs can be found in the definition of each model.
+                             exclude_category_ids=[])
+
+# Saving the detection results in timelapse JSON format
+pw_utils.save_detection_timelapse_json(results, os.path.join(".","batch_output_timelapse.json"),
+                             categories=detection_model.CLASS_NAMES)
diff --git a/demo/image_detection_colabdemo.ipynb b/demo/image_detection_colabdemo.ipynb
@@ -1047,6 +1047,26 @@
         "                             exclude_category_ids=[]) # Category IDs can be found in the definition of each model."
       ]
     },
+    {
+      "cell_type": "markdown",
+      "id": "20251521",
+      "metadata": {},
+      "source": [
+        "### 3.1 Timelapse JSON Format:\n",
+        "PytorchWildlife is compatible with Timelapse, a popular tool to visualize and process camera trap data. This code will output the detection results in a JSON format compatible with Timelapse:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "0689bc45",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "pw_utils.save_detection_timelapse_json(results, os.path.join(\".\",\"batch_output_timelapse.json\"),\n",
+        "                             categories=detection_model.CLASS_NAMES)"
+      ]
+    },
     {
       "cell_type": "markdown",
       "id": "a4ee1d7b",

diff --git a/demo/image_detection_demo.ipynb b/demo/image_detection_demo.ipynb
@@ -181,6 +181,26 @@
     "                             exclude_category_ids=[]) # Category IDs can be found in the definition of each model."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "fceda190",
+   "metadata": {},
+   "source": [
+    "### 3.1 Timelapse JSON Format:\n",
+    "PytorchWildlife is compatible with Timelapse, a popular tool to visualize and process camera trap data. This code will output the detection results in a JSON format compatible with Timelapse:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2ac9b72",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pw_utils.save_detection_timelapse_json(results, os.path.join(\".\",\"batch_output_timelapse.json\"),\n",
+    "                             categories=detection_model.CLASS_NAMES)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "a4ee1d7b",

diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
         long_description = file.read()
 setup(
     name='PytorchWildlife',
-    version='1.0.2.8', 
+    version='1.0.2.9.1', 
     packages=find_packages(),
     url='https://github.com/microsoft/CameraTraps/',  
     license='MIT',

diff --git a/version.txt b/version.txt
@@ -1 +1 @@
-1.0.2.8
+1.0.2.9