diff --git a/PytorchWildlife/models/detection/yolov5/base_detector.py b/PytorchWildlife/models/detection/yolov5/base_detector.py index ce85fb1ba..eb4406461 100644 --- a/PytorchWildlife/models/detection/yolov5/base_detector.py +++ b/PytorchWildlife/models/detection/yolov5/base_detector.py @@ -154,7 +154,16 @@ def batch_image_detection(self, dataloader, conf_thres=0.2, id_strip=None): # If there are size differences in the input images, use a for loop instead of matrix processing for scaling for pred, size, path in zip(total_preds, total_img_sizes, total_paths): + original_coords = pred[:, :4].copy() + normalized_coords = [] pred[:, :4] = scale_coords([self.IMAGE_SIZE] * 2, pred[:, :4], size).round() - results.append(self.results_generation(pred, path, id_strip)) + res = self.results_generation(pred, path, id_strip) + # Normalize the coordinates for timelapse compatibility + for coord in pred[:, :4]: + x1, y1, x2, y2 = coord + x1, y1, x2, y2 = x1 / size[1], y1 / size[0], x2 / size[1], y2 / size[0] + normalized_coords.append([x1, y1, x2, y2]) + res["normalized_coords"] = normalized_coords + results.append(res) return results diff --git a/PytorchWildlife/utils/post_process.py b/PytorchWildlife/utils/post_process.py index 12f104786..ef6f028b7 100644 --- a/PytorchWildlife/utils/post_process.py +++ b/PytorchWildlife/utils/post_process.py @@ -15,6 +15,8 @@ "save_crop_images", "save_detection_json", "save_detection_classification_json", + "save_detection_timelapse_json", + "save_detection_classification_timelapse_json", ] @@ -119,6 +121,48 @@ def save_detection_json(results, output_dir, categories=None, exclude_category_i json.dump(json_results, f, indent=4) +def save_detection_timelapse_json(det_results, output_dir, categories=None): + """ + Save detection results to a JSON file. + + Args: + results (list): + Detection results containing image ID, bounding boxes, category, and confidence. + output_dir (str): + Path to save the output JSON file. + categories (list, optional): + List of categories for detected objects. Defaults to None. + """ + json_results = { + "info": {"detector": "megadetector_v5"}, + "detection_categories": categories, + "images": [] + } + + for det_r in det_results: + image_annotations = { + "file": det_r["img_id"], + "max_detection_conf": max(det_r["detections"].confidence.tolist()), + "detections": [] + } + + for i in range(len(det_r["detections"])): + det = det_r["detections"][i] + normalized_bbox = [float(y) for y in det_r["normalized_coords"][i]] + detection = { + "category": str(det.class_id[0]), + "conf": float(det.confidence[0]), + "bbox": [normalized_bbox[0], normalized_bbox[1], normalized_bbox[2]-normalized_bbox[0], normalized_bbox[3]-normalized_bbox[1]], + "classifications": [] + } + + image_annotations["detections"].append(detection) + + json_results["images"].append(image_annotations) + + with open(output_dir, "w") as f: + json.dump(json_results, f, indent=4) + def save_detection_classification_json( det_results, clf_results, output_path, det_categories=None, clf_categories=None @@ -176,4 +220,60 @@ def save_detection_classification_json( "clf_confidence": [float(x) for x in clf_confidence], } ) - json.dump(json_results, f) + json.dump(json_results, f, indent=4) + + +def save_detection_classification_timelapse_json( + det_results, clf_results, output_path, det_categories=None, clf_categories=None +): + """ + Save detection and classification results to a JSON file in the specified format. + + Args: + det_results (list): + Detection results containing image ID, bounding boxes, detection category, and confidence. + clf_results (list): + Classification results containing image ID, classification category, and confidence. + output_path (str): + Path to save the output JSON file. + det_categories (dict, optional): + Dictionary of categories for detected objects. Defaults to None. + clf_categories (dict, optional): + Dictionary of categories for classified objects. Defaults to None. + + """ + json_results = { + "info": {"detector": "megadetector_v5"}, + "detection_categories": det_categories, + "classification_categories": clf_categories, + "images": [] + } + + for det_r in det_results: + image_annotations = { + "file": det_r["img_id"], + "max_detection_conf": max(det_r["detections"].confidence.tolist()), + "detections": [] + } + + for i in range(len(det_r["detections"])): + det = det_r["detections"][i] + normalized_bbox = [float(y) for y in det_r["normalized_coords"][i]] + detection = { + "category": str(det.class_id[0]), + "conf": float(det.confidence[0]), + "bbox": [normalized_bbox[0], normalized_bbox[1], normalized_bbox[2]-normalized_bbox[0], normalized_bbox[3]-normalized_bbox[1]], + "classifications": [] + } + + # Find classifications for this detection + for clf_r in clf_results: + if clf_r["img_id"] == det_r["img_id"]: + detection["classifications"].append([str(clf_r["class_id"]), float(clf_r["confidence"])]) + + image_annotations["detections"].append(detection) + + json_results["images"].append(image_annotations) + + with open(output_path, "w") as f: + json.dump(json_results, f, indent=4) diff --git a/README.md b/README.md index 56086b6a2..8d1ee9b6b 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,8 @@ -## ✅ Update highlights (Version 1.0.2) +## ✅ Update highlights (Version 1.0.2.9) +- [x] Added Timelapse compatibility! Check the [Gradio interface](INSTALLATION.md) or [notebooks](https://github.com/microsoft/CameraTraps/tree/main/demo). - [x] Added Google Colab demos. - [x] Added Snapshot Serengeti classification model into the model zoo. - [x] Added Classification fine-tuning module. diff --git a/demo/gradio_demo.py b/demo/gradio_demo.py index 553b2427b..59fea4977 100644 --- a/demo/gradio_demo.py +++ b/demo/gradio_demo.py @@ -94,12 +94,13 @@ def single_image_detection(input_img, det_conf_thres, clf_conf_thres, img_index= return annotated_img -def batch_detection(zip_file, det_conf_thres): +def batch_detection(zip_file, timelapse, det_conf_thres): """Perform detection on a batch of images from a zip file and return path to results JSON. Args: zip_file (File): Zip file containing images. det_conf_thre (float): Confidence threshold for detection. + timelapse (boolean): Flag to output JSON for timelapse. clf_conf_thre (float): Confidence threshold for classification. Returns: @@ -135,13 +136,23 @@ def batch_detection(zip_file, det_conf_thres): clf_loader = DataLoader(clf_dataset, batch_size=32, shuffle=False, pin_memory=True, num_workers=4, drop_last=False) clf_results = classification_model.batch_image_classification(clf_loader, id_strip=tgt_folder_path) - pw_utils.save_detection_classification_json(det_results=det_results, - clf_results=clf_results, - det_categories=detection_model.CLASS_NAMES, - clf_categories=classification_model.CLASS_NAMES, - output_path=json_save_path) + if timelapse: + pw_utils.save_detection_classification_timelapse_json(det_results=det_results, + clf_results=clf_results, + det_categories=detection_model.CLASS_NAMES, + clf_categories=classification_model.CLASS_NAMES, + output_path=json_save_path) + else: + pw_utils.save_detection_classification_json(det_results=det_results, + clf_results=clf_results, + det_categories=detection_model.CLASS_NAMES, + clf_categories=classification_model.CLASS_NAMES, + output_path=json_save_path) else: - pw_utils.save_detection_json(det_results, json_save_path, categories=detection_model.CLASS_NAMES) + if timelapse: + pw_utils.save_detection_timelapse_json(det_results, json_save_path, categories=detection_model.CLASS_NAMES) + else: + pw_utils.save_detection_json(det_results, json_save_path, categories=detection_model.CLASS_NAMES) return json_save_path @@ -199,6 +210,7 @@ def callback(frame, index): with gr.Row(): with gr.Column(): bth_in = gr.File(label="Upload zip file.") + chck_timelapse = gr.Checkbox(label="Timelapse Output", info="Output JSON for timelapse.") bth_conf_sl = gr.Slider(0, 1, label="Detection Confidence Threshold", value=0.2) bth_out = gr.File(label="Detection Results JSON.", height=200) bth_but = gr.Button("Detect Animals!") @@ -220,7 +232,7 @@ def callback(frame, index): load_but.click(load_models, inputs=[det_drop, clf_drop], outputs=load_out) sgl_but.click(single_image_detection, inputs=[sgl_in, sgl_conf_sl_det, sgl_conf_sl_clf], outputs=sgl_out) - bth_but.click(batch_detection, inputs=[bth_in, bth_conf_sl], outputs=bth_out) + bth_but.click(batch_detection, inputs=[bth_in, chck_timelapse, bth_conf_sl], outputs=bth_out) vid_but.click(video_detection, inputs=[vid_in, vid_conf_sl_det, vid_conf_sl_clf, vid_fr, vid_enc], outputs=vid_out) if __name__ == "__main__": diff --git a/demo/image_demo.py b/demo/image_demo.py index 96678ce30..72251f715 100644 --- a/demo/image_demo.py +++ b/demo/image_demo.py @@ -78,4 +78,8 @@ # Saving the detection results in JSON format pw_utils.save_detection_json(results, os.path.join(".","batch_output.json"), categories=detection_model.CLASS_NAMES, - exclude_category_ids=[]) # Category IDs can be found in the definition of each model. + exclude_category_ids=[]) + +# Saving the detection results in timelapse JSON format +pw_utils.save_detection_timelapse_json(results, os.path.join(".","batch_output_timelapse.json"), + categories=detection_model.CLASS_NAMES) \ No newline at end of file diff --git a/demo/image_detection_colabdemo.ipynb b/demo/image_detection_colabdemo.ipynb index 42bdc7a2b..1c2451ea9 100644 --- a/demo/image_detection_colabdemo.ipynb +++ b/demo/image_detection_colabdemo.ipynb @@ -1047,6 +1047,26 @@ " exclude_category_ids=[]) # Category IDs can be found in the definition of each model." ] }, + { + "cell_type": "markdown", + "id": "20251521", + "metadata": {}, + "source": [ + "### 3.1 Timelapse JSON Format:\n", + "PytorchWildlife is compatible with Timelapse, a popular tool to visualize and process camera trap data. This code will output the detection results in a JSON format compatible with Timelapse:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0689bc45", + "metadata": {}, + "outputs": [], + "source": [ + "pw_utils.save_detection_timelapse_json(results, os.path.join(\".\",\"batch_output_timelapse.json\"),\n", + " categories=detection_model.CLASS_NAMES)" + ] + }, { "cell_type": "markdown", "id": "a4ee1d7b", diff --git a/demo/image_detection_demo.ipynb b/demo/image_detection_demo.ipynb index 10c424fc7..31457754c 100644 --- a/demo/image_detection_demo.ipynb +++ b/demo/image_detection_demo.ipynb @@ -181,6 +181,26 @@ " exclude_category_ids=[]) # Category IDs can be found in the definition of each model." ] }, + { + "cell_type": "markdown", + "id": "fceda190", + "metadata": {}, + "source": [ + "### 3.1 Timelapse JSON Format:\n", + "PytorchWildlife is compatible with Timelapse, a popular tool to visualize and process camera trap data. This code will output the detection results in a JSON format compatible with Timelapse:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2ac9b72", + "metadata": {}, + "outputs": [], + "source": [ + "pw_utils.save_detection_timelapse_json(results, os.path.join(\".\",\"batch_output_timelapse.json\"),\n", + " categories=detection_model.CLASS_NAMES)" + ] + }, { "cell_type": "markdown", "id": "a4ee1d7b", diff --git a/setup.py b/setup.py index 2735d55d3..ae58b7698 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ long_description = file.read() setup( name='PytorchWildlife', - version='1.0.2.8', + version='1.0.2.9.1', packages=find_packages(), url='https://github.com/microsoft/CameraTraps/', license='MIT', diff --git a/version.txt b/version.txt index 97cb20662..c2a950f18 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.0.2.8 \ No newline at end of file +1.0.2.9 \ No newline at end of file