From c375b93315565a9f2a7e22d959b359b339761b89 Mon Sep 17 00:00:00 2001
From: Jakub <jakub.pingielski@deepsense.ai>
Date: Mon, 9 May 2022 11:54:08 +0200
Subject: [PATCH 01/30] Added support for config

---
 tools/train.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/train.py b/tools/train.py
index b69066fa6..51a6baca4 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -41,6 +41,12 @@ def make_parser():
         type=str,
         help="plz input your experiment description file",
     )
+    parser.add_argument(
+        "--config_filepath",
+        default=None,
+        type=str,
+        help="Filepath to config file",
+    )
     parser.add_argument(
         "--resume", default=False, action="store_true", help="resume training"
     )

From c03d5bab9333771c6ba798530a2b66fdec69c442 Mon Sep 17 00:00:00 2001
From: Jakub <jakub.pingielski@deepsense.ai>
Date: Mon, 9 May 2022 12:26:05 +0200
Subject: [PATCH 02/30] Added neptune integration

---
 tools/train.py        |  6 ++++++
 yolox/core/trainer.py |  2 +-
 yolox/exp/base_exp.py | 14 +++++++++++++-
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index 51a6baca4..9195fb15e 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -7,6 +7,7 @@
 import warnings
 from loguru import logger
 
+import yaml
 import torch
 import torch.backends.cudnn as cudnn
 
@@ -131,6 +132,11 @@ def main(exp, args):
     if not args.experiment_name:
         args.experiment_name = exp.exp_name
 
+    if args.config_filepath is not None:
+        with open(args.config_filepath, "r") as f:
+            config = yaml.safe_load(f)
+        exp.add_params_from_config(config, use_neptune=True)
+
     num_gpu = get_num_devices() if args.devices is None else args.devices
     assert num_gpu <= get_num_devices()
 
diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index a9ee2a681..b0b0afeb3 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -38,7 +38,7 @@ def __init__(self, exp, args):
         # before_train methods.
         self.exp = exp
         self.args = args
-
+        self.neptune = self.exp.neptune
         # training related attr
         self.max_epoch = exp.max_epoch
         self.amp_training = args.fp16
diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py
index e26ae079c..127c5de07 100644
--- a/yolox/exp/base_exp.py
+++ b/yolox/exp/base_exp.py
@@ -8,6 +8,7 @@
 from typing import Dict
 from tabulate import tabulate
 
+import neptune.new as neptune
 import torch
 from torch.nn import Module
 
@@ -22,7 +23,10 @@ def __init__(self):
         self.output_dir = "./YOLOX_outputs"
         self.print_interval = 100
         self.eval_interval = 10
-
+        self.neptune = neptune.init(
+            project="jakub.pingielski/b-yond",
+            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NTlkYzZmZC1kZTY5LTQ2NjMtODFkZC04YmY4NTNmYTkwMTIifQ==",
+        )
     @abstractmethod
     def get_model(self) -> Module:
         pass
@@ -73,3 +77,11 @@ def merge(self, cfg_list):
                     except Exception:
                         v = ast.literal_eval(v)
                 setattr(self, k, v)
+
+    def add_params_from_config(self, config: dict, use_neptune: bool = True):
+        for key, value in config.items():
+            setattr(self, key, value)
+            if use_neptune and self.neptune:
+                self.neptune[f"config/{key}"].log(value)
+
+

From c56359c040db0de33ce0ad4ec9d8f0f114c6b257 Mon Sep 17 00:00:00 2001
From: Jakub <jakub.pingielski@deepsense.ai>
Date: Tue, 10 May 2022 11:30:41 +0200
Subject: [PATCH 03/30] artifact logging

---
 tools/train.py            | 2 +-
 yolox/core/trainer.py     | 2 ++
 yolox/exp/yolox_base.py   | 2 +-
 yolox/utils/checkpoint.py | 7 ++++++-
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index 9195fb15e..abdb55f08 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -136,7 +136,7 @@ def main(exp, args):
         with open(args.config_filepath, "r") as f:
             config = yaml.safe_load(f)
         exp.add_params_from_config(config, use_neptune=True)
-
+        exp.neptune.log_artifact(args.config_filepath)
     num_gpu = get_num_devices() if args.devices is None else args.devices
     assert num_gpu <= get_num_devices()
 
diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index b0b0afeb3..1b79c53fb 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -364,3 +364,5 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False):
 
             if self.args.logger == "wandb":
                 self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt)
+            if self.neptune:
+                self.neptune.log_artefact()
diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py
index 611b25825..5d0496f49 100644
--- a/yolox/exp/yolox_base.py
+++ b/yolox/exp/yolox_base.py
@@ -94,7 +94,7 @@ def __init__(self):
         self.eval_interval = 10
         # save history checkpoint or not.
         # If set to False, yolox will only save latest and best ckpt.
-        self.save_history_ckpt = True
+        self.save_history_ckpt = False
         # name of experiment
         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 
diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py
index a0c200e41..2d6fa4226 100644
--- a/yolox/utils/checkpoint.py
+++ b/yolox/utils/checkpoint.py
@@ -4,6 +4,7 @@
 import os
 import shutil
 from loguru import logger
+import neptune.new as neptune
 
 import torch
 
@@ -33,7 +34,7 @@ def load_ckpt(model, ckpt):
     return model
 
 
-def save_checkpoint(state, is_best, save_dir, model_name=""):
+def save_checkpoint(state, is_best, save_dir, model_name, neptune):
     if not os.path.exists(save_dir):
         os.makedirs(save_dir)
     filename = os.path.join(save_dir, model_name + "_ckpt.pth")
@@ -41,3 +42,7 @@ def save_checkpoint(state, is_best, save_dir, model_name=""):
     if is_best:
         best_filename = os.path.join(save_dir, "best_ckpt.pth")
         shutil.copyfile(filename, best_filename)
+        if neptune:
+            neptune.log_artifact(best_filename)
+
+

From a0f589c505fa8e1164e42b442ecf5e1a35ecfe9a Mon Sep 17 00:00:00 2001
From: Jakub <jakub.pingielski@deepsense.ai>
Date: Tue, 10 May 2022 11:43:44 +0200
Subject: [PATCH 04/30] artifact logging

---
 tools/train.py            | 1 +
 yolox/core/trainer.py     | 4 ++--
 yolox/utils/checkpoint.py | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index abdb55f08..9a833d03a 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -137,6 +137,7 @@ def main(exp, args):
             config = yaml.safe_load(f)
         exp.add_params_from_config(config, use_neptune=True)
         exp.neptune.log_artifact(args.config_filepath)
+        exp.neptune['config'].track_files(args.config_filepath)
     num_gpu = get_num_devices() if args.devices is None else args.devices
     assert num_gpu <= get_num_devices()
 
diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index 1b79c53fb..3416a85fe 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -360,9 +360,9 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False):
                 update_best_ckpt,
                 self.file_name,
                 ckpt_name,
+                self.neptune,
             )
 
             if self.args.logger == "wandb":
                 self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt)
-            if self.neptune:
-                self.neptune.log_artefact()
+
diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py
index 2d6fa4226..74995d038 100644
--- a/yolox/utils/checkpoint.py
+++ b/yolox/utils/checkpoint.py
@@ -44,5 +44,7 @@ def save_checkpoint(state, is_best, save_dir, model_name, neptune):
         shutil.copyfile(filename, best_filename)
         if neptune:
             neptune.log_artifact(best_filename)
+            neptune['best_checkpoint'].track_files(best_filename)
+
 
 

From dbc43f89a80a200e7b1377b7dc58c78115efa39c Mon Sep 17 00:00:00 2001
From: Jakub <jakub.pingielski@deepsense.ai>
Date: Tue, 10 May 2022 11:53:13 +0200
Subject: [PATCH 05/30] artifact logging

---
 tools/train.py            | 2 +-
 yolox/utils/checkpoint.py | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index 9a833d03a..46eeff52f 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -136,7 +136,7 @@ def main(exp, args):
         with open(args.config_filepath, "r") as f:
             config = yaml.safe_load(f)
         exp.add_params_from_config(config, use_neptune=True)
-        exp.neptune.log_artifact(args.config_filepath)
+        print("saving config from", args.config_filepath)
         exp.neptune['config'].track_files(args.config_filepath)
     num_gpu = get_num_devices() if args.devices is None else args.devices
     assert num_gpu <= get_num_devices()
diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py
index 74995d038..5595844bf 100644
--- a/yolox/utils/checkpoint.py
+++ b/yolox/utils/checkpoint.py
@@ -42,9 +42,8 @@ def save_checkpoint(state, is_best, save_dir, model_name, neptune):
     if is_best:
         best_filename = os.path.join(save_dir, "best_ckpt.pth")
         shutil.copyfile(filename, best_filename)
-        if neptune:
-            neptune.log_artifact(best_filename)
-            neptune['best_checkpoint'].track_files(best_filename)
+        print("saving best checkpoint to ", best_filename)
+        neptune['best_checkpoint'].track_files(best_filename)
 
 
 

From 3fe9cd281160e99d388cc2faa6e926e28680a73c Mon Sep 17 00:00:00 2001
From: Jakub <jakub.pingielski@deepsense.ai>
Date: Tue, 10 May 2022 13:43:22 +0200
Subject: [PATCH 06/30] refactor

---
 tools/train.py            | 1 -
 yolox/utils/checkpoint.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index 46eeff52f..a7d40028d 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -136,7 +136,6 @@ def main(exp, args):
         with open(args.config_filepath, "r") as f:
             config = yaml.safe_load(f)
         exp.add_params_from_config(config, use_neptune=True)
-        print("saving config from", args.config_filepath)
         exp.neptune['config'].track_files(args.config_filepath)
     num_gpu = get_num_devices() if args.devices is None else args.devices
     assert num_gpu <= get_num_devices()
diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py
index 5595844bf..00ae8c5f3 100644
--- a/yolox/utils/checkpoint.py
+++ b/yolox/utils/checkpoint.py
@@ -42,7 +42,6 @@ def save_checkpoint(state, is_best, save_dir, model_name, neptune):
     if is_best:
         best_filename = os.path.join(save_dir, "best_ckpt.pth")
         shutil.copyfile(filename, best_filename)
-        print("saving best checkpoint to ", best_filename)
         neptune['best_checkpoint'].track_files(best_filename)
 
 

From 586d3ede0c2de483bbd9838a2ef45080272187bb Mon Sep 17 00:00:00 2001
From: Jakub <jakub.pingielski@deepsense.ai>
Date: Tue, 10 May 2022 14:06:50 +0200
Subject: [PATCH 07/30] refactor

---
 yolox/utils/checkpoint.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py
index 00ae8c5f3..50da303a6 100644
--- a/yolox/utils/checkpoint.py
+++ b/yolox/utils/checkpoint.py
@@ -34,7 +34,7 @@ def load_ckpt(model, ckpt):
     return model
 
 
-def save_checkpoint(state, is_best, save_dir, model_name, neptune):
+def save_checkpoint(state, is_best, save_dir, model_name="", neptune=None):
     if not os.path.exists(save_dir):
         os.makedirs(save_dir)
     filename = os.path.join(save_dir, model_name + "_ckpt.pth")
@@ -42,7 +42,5 @@ def save_checkpoint(state, is_best, save_dir, model_name, neptune):
     if is_best:
         best_filename = os.path.join(save_dir, "best_ckpt.pth")
         shutil.copyfile(filename, best_filename)
-        neptune['best_checkpoint'].track_files(best_filename)
-
-
-
+        if neptune:
+            neptune['best_checkpoint'].track_files(best_filename)

From da1f121df589092323de3d73f24bf229d25291ac Mon Sep 17 00:00:00 2001
From: Jakub <jakub.pingielski@deepsense.ai>
Date: Thu, 12 May 2022 11:52:24 +0200
Subject: [PATCH 08/30] add more metrics

---
 tools/train.py        | 2 +-
 yolox/core/trainer.py | 6 ++++--
 yolox/exp/base_exp.py | 8 ++++++--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index a7d40028d..fb2b35717 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -136,7 +136,7 @@ def main(exp, args):
         with open(args.config_filepath, "r") as f:
             config = yaml.safe_load(f)
         exp.add_params_from_config(config, use_neptune=True)
-        exp.neptune['config'].track_files(args.config_filepath)
+        exp.neptune['config_file'].track_files(args.config_filepath)
     num_gpu = get_num_devices() if args.devices is None else args.devices
     assert num_gpu <= get_num_devices()
 
diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index 3416a85fe..6048101ee 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -114,6 +114,7 @@ def train_one_iter(self):
             self.ema_model.update(self.model)
 
         lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1)
+        self.neptune['lr'].log(lr)
         for param_group in self.optimizer.param_groups:
             param_group["lr"] = lr
 
@@ -243,7 +244,8 @@ def after_iter(self):
             loss_str = ", ".join(
                 ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
             )
-
+            for loss_name, loss_value in loss_meter.items():
+                self.neptune[loss_name].log(loss_value.latest())
             time_meter = self.meter.get_filtered_meter("time")
             time_str = ", ".join(
                 ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]
@@ -327,7 +329,7 @@ def evaluate_and_save_model(self):
 
         update_best_ckpt = ap50_95 > self.best_ap
         self.best_ap = max(self.best_ap, ap50_95)
-
+        self.neptune['best_ap'].log(self.best_ap)
         if self.rank == 0:
             if self.args.logger == "tensorboard":
                 self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py
index 127c5de07..e76400b17 100644
--- a/yolox/exp/base_exp.py
+++ b/yolox/exp/base_exp.py
@@ -13,7 +13,7 @@
 from torch.nn import Module
 
 from yolox.utils import LRScheduler
-
+from paths import DATASETS_PATH
 
 class BaseExp(metaclass=ABCMeta):
     """Basic class for any experiment."""
@@ -80,7 +80,11 @@ def merge(self, cfg_list):
 
     def add_params_from_config(self, config: dict, use_neptune: bool = True):
         for key, value in config.items():
-            setattr(self, key, value)
+            if key == "dataset_version":
+                value = DATASETS_PATH / key
+                setattr("dataset_dir", value)
+            else:
+                setattr(self, key, value)
             if use_neptune and self.neptune:
                 self.neptune[f"config/{key}"].log(value)
 

From 9415f617129caf6863a03d1ebd77e679a24c112d Mon Sep 17 00:00:00 2001
From: Jakub <jakub.pingielski@deepsense.ai>
Date: Thu, 12 May 2022 12:20:42 +0200
Subject: [PATCH 09/30] bug fix

---
 yolox/core/trainer.py | 6 +++---
 yolox/exp/base_exp.py | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index 6048101ee..94b18024e 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -114,7 +114,7 @@ def train_one_iter(self):
             self.ema_model.update(self.model)
 
         lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1)
-        self.neptune['lr'].log(lr)
+        self.neptune['config/lr'].log(lr)
         for param_group in self.optimizer.param_groups:
             param_group["lr"] = lr
 
@@ -245,7 +245,7 @@ def after_iter(self):
                 ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
             )
             for loss_name, loss_value in loss_meter.items():
-                self.neptune[loss_name].log(loss_value.latest())
+                self.neptune[f"loss/{loss_name}"].log(loss_value.latest)
             time_meter = self.meter.get_filtered_meter("time")
             time_str = ", ".join(
                 ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]
@@ -329,7 +329,7 @@ def evaluate_and_save_model(self):
 
         update_best_ckpt = ap50_95 > self.best_ap
         self.best_ap = max(self.best_ap, ap50_95)
-        self.neptune['best_ap'].log(self.best_ap)
+        self.neptune['metrics/best_ap'].log(self.best_ap)
         if self.rank == 0:
             if self.args.logger == "tensorboard":
                 self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py
index e76400b17..479cd3414 100644
--- a/yolox/exp/base_exp.py
+++ b/yolox/exp/base_exp.py
@@ -81,8 +81,7 @@ def merge(self, cfg_list):
     def add_params_from_config(self, config: dict, use_neptune: bool = True):
         for key, value in config.items():
             if key == "dataset_version":
-                value = DATASETS_PATH / key
-                setattr("dataset_dir", value)
+                setattr(self, "dataset_dir", DATASETS_PATH / value)
             else:
                 setattr(self, key, value)
             if use_neptune and self.neptune:

From 9e7c5b84179715bcc838c5fc530d115cc5957077 Mon Sep 17 00:00:00 2001
From: Dawid Stachowiak <dawid.stachowiak@pascal01.intra.deepsense.ai>
Date: Thu, 19 May 2022 16:01:14 +0200
Subject: [PATCH 10/30] fixes in training and neptune logging

---
 tools/train.py            |  7 ++++++-
 yolox/exp/base_exp.py     | 17 +++++++++++++----
 yolox/utils/checkpoint.py |  2 +-
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index fb2b35717..cfbf19954 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -127,6 +127,11 @@ def main(exp, args):
 if __name__ == "__main__":
     args = make_parser().parse_args()
     exp = get_exp(args.exp_file, args.name)
+
+    #TODO: Add neptune logging with multidevice training. Logging now works only 
+    # on 1 gpu device training, not working with multiprocessing.
+    exp.set_neptune_logging(True)
+
     exp.merge(args.opts)
 
     if not args.experiment_name:
@@ -136,7 +141,7 @@ def main(exp, args):
         with open(args.config_filepath, "r") as f:
             config = yaml.safe_load(f)
         exp.add_params_from_config(config, use_neptune=True)
-        exp.neptune['config_file'].track_files(args.config_filepath)
+        exp.neptune['config_file'].upload(args.config_filepath)
     num_gpu = get_num_devices() if args.devices is None else args.devices
     assert num_gpu <= get_num_devices()
 
diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py
index 479cd3414..1aabd0d51 100644
--- a/yolox/exp/base_exp.py
+++ b/yolox/exp/base_exp.py
@@ -23,10 +23,8 @@ def __init__(self):
         self.output_dir = "./YOLOX_outputs"
         self.print_interval = 100
         self.eval_interval = 10
-        self.neptune = neptune.init(
-            project="jakub.pingielski/b-yond",
-            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NTlkYzZmZC1kZTY5LTQ2NjMtODFkZC04YmY4NTNmYTkwMTIifQ==",
-        )
+        self.neptune = None
+
     @abstractmethod
     def get_model(self) -> Module:
         pass
@@ -64,6 +62,17 @@ def __repr__(self):
         ]
         return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
 
+    def set_neptune_logging(self, state):
+        if state:
+            self.neptune = neptune.init(
+            project="jakub.pingielski/b-yond",
+            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NTlkYzZmZC1kZTY5LTQ2NjMtODFkZC04YmY4NTNmYTkwMTIifQ==",
+        )
+        else:
+            if self.neptune is not None:
+                self.neptune.stop()
+            self.neptune = None
+
     def merge(self, cfg_list):
         assert len(cfg_list) % 2 == 0
         for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py
index 50da303a6..e7d732bef 100644
--- a/yolox/utils/checkpoint.py
+++ b/yolox/utils/checkpoint.py
@@ -43,4 +43,4 @@ def save_checkpoint(state, is_best, save_dir, model_name="", neptune=None):
         best_filename = os.path.join(save_dir, "best_ckpt.pth")
         shutil.copyfile(filename, best_filename)
         if neptune:
-            neptune['best_checkpoint'].track_files(best_filename)
+            neptune['best_checkpoint'].upload(best_filename)

From d26a6ca5600528e1eaad05781e6789cee56d4c81 Mon Sep 17 00:00:00 2001
From: Aditya-Bobade <aditya.bobade@b-yond.com>
Date: Thu, 19 May 2022 21:38:01 +0530
Subject: [PATCH 11/30] validation loss logging

---
 yolox/core/trainer.py   | 38 ++++++++++++++++++++++++++++
 yolox/exp/yolox_base.py | 56 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)

diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index 94b18024e..f01c85aac 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -152,10 +152,18 @@ def before_train(self):
             no_aug=self.no_aug,
             cache_img=self.args.cache,
         )
+        self.val_loader = self.exp.get_val_loader(
+            batch_size=self.args.batch_size,
+            is_distributed=self.is_distributed,
+            no_aug=False,
+            cache_img=self.args.cache,
+        )
         logger.info("init prefetcher, this might take one minute or less...")
         self.prefetcher = DataPrefetcher(self.train_loader)
+        self.val_prefetcher = DataPrefetcher(self.val_loader)
         # max_iter means iters per epoch
         self.max_iter = len(self.train_loader)
+        self.max_val_iter = len(self.val_loader)
 
         self.lr_scheduler = self.exp.get_lr_scheduler(
             self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter
@@ -315,6 +323,9 @@ def resume_train(self, model):
         return model
 
     def evaluate_and_save_model(self):
+        # calculate loss
+        self.calculate_eval_loss()
+
         if self.use_model_ema:
             evalmodel = self.ema_model.ema
         else:
@@ -368,3 +379,30 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False):
             if self.args.logger == "wandb":
                 self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt)
 
+    def calculate_eval_loss(self):
+        for iter in range(self.max_val_iter):
+            inps, targets = self.val_prefetcher.next()
+            inps = inps.to(self.data_type)
+            targets = targets.to(self.data_type)
+            targets.requires_grad = False
+            inps, targets = self.exp.preprocess(inps, targets, self.input_size)
+
+            with torch.cuda.amp.autocast(enabled=self.amp_training):
+                outputs = self.model(inps, targets)
+
+            loss = {
+                "total_loss": outputs["total_loss"],
+                "iou_loss": outputs["iou_loss"],
+                "l1_loss": outputs["l1_loss"],
+                "conf_loss": outputs["conf_loss"],
+                "cls_loss": outputs["cls_loss"]
+            }
+            progress_str = "epoch: {}/{}, iter: {}/{},".format(
+                self.epoch + 1, self.max_epoch, iter + 1, self.max_val_iter
+            )
+
+            for loss_name, loss_value in loss.items():
+                progress_str += " {}: {:.1f}".format(loss_name, loss_value)
+                self.neptune[f"loss/val/{loss_name}"].log(loss_value)
+
+            logger.info("Validation:{}".format(progress_str))
diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py
index 5d0496f49..097a9c5e3 100644
--- a/yolox/exp/yolox_base.py
+++ b/yolox/exp/yolox_base.py
@@ -201,6 +201,62 @@ def get_data_loader(
 
         return train_loader
 
+    def get_val_loader(
+        self, batch_size, is_distributed, no_aug=False, cache_img=False, testdev=False
+    ):
+        from yolox.data import (
+            COCODataset,
+            TrainTransform,
+            YoloBatchSampler,
+            DataLoader,
+            InfiniteSampler,
+            MosaicDetection,
+            worker_init_reset_seed,
+        )
+        from yolox.utils import (
+            wait_for_the_master,
+            get_local_rank,
+        )
+
+        local_rank = get_local_rank()
+
+        with wait_for_the_master(local_rank):
+            dataset = COCODataset(
+                data_dir=self.data_dir,
+                json_file=self.val_ann if not testdev else self.test_ann,
+                img_size=self.input_size,
+                preproc=TrainTransform(
+                    max_labels=50,
+                    flip_prob=0.0,
+                    hsv_prob=0.0),
+                cache=cache_img,
+            )
+
+        self.dataset = dataset
+
+        if is_distributed:
+            batch_size = batch_size // dist.get_world_size()
+
+        sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
+
+        batch_sampler = YoloBatchSampler(
+            sampler=sampler,
+            batch_size=batch_size,
+            drop_last=False,
+            mosaic=not no_aug,
+        )
+
+        dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
+        dataloader_kwargs["batch_sampler"] = batch_sampler
+
+        # Make sure each process has different random seed, especially for 'fork' method.
+        # Check https://github.com/pytorch/pytorch/issues/63311 for more details.
+        dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed
+
+        val_loader = DataLoader(self.dataset, **dataloader_kwargs)
+
+        return val_loader
+
     def random_resize(self, data_loader, epoch, rank, is_distributed):
         tensor = torch.LongTensor(2).cuda()
 

From e53d2bc5343490fc4d1e76a3421972fe10ee1c44 Mon Sep 17 00:00:00 2001
From: Aditya-Bobade <aditya.bobade@b-yond.com>
Date: Fri, 20 May 2022 14:28:16 +0530
Subject: [PATCH 12/30] flag for validation loss logging

---
 yolox/core/trainer.py   | 25 ++++++++++++++++---------
 yolox/exp/yolox_base.py |  2 ++
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index f01c85aac..3dedda515 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -55,6 +55,9 @@ def __init__(self, exp, args):
         self.input_size = exp.input_size
         self.best_ap = 0
 
+        # validation loss
+        self.calc_validation_loss = exp.calc_val_loss
+
         # metric record
         self.meter = MeterBuffer(window_size=exp.print_interval)
         self.file_name = os.path.join(exp.output_dir, args.experiment_name)
@@ -152,18 +155,21 @@ def before_train(self):
             no_aug=self.no_aug,
             cache_img=self.args.cache,
         )
-        self.val_loader = self.exp.get_val_loader(
-            batch_size=self.args.batch_size,
-            is_distributed=self.is_distributed,
-            no_aug=False,
-            cache_img=self.args.cache,
-        )
+        if self.calc_validation_loss:
+            self.val_loader = self.exp.get_val_loader(
+                batch_size=self.args.batch_size,
+                is_distributed=self.is_distributed,
+                no_aug=False,
+                cache_img=self.args.cache,
+            )
         logger.info("init prefetcher, this might take one minute or less...")
         self.prefetcher = DataPrefetcher(self.train_loader)
-        self.val_prefetcher = DataPrefetcher(self.val_loader)
+        if self.calc_validation_loss:
+            self.val_prefetcher = DataPrefetcher(self.val_loader)
         # max_iter means iters per epoch
         self.max_iter = len(self.train_loader)
-        self.max_val_iter = len(self.val_loader)
+        if self.calc_validation_loss:
+            self.max_val_iter = len(self.val_loader)
 
         self.lr_scheduler = self.exp.get_lr_scheduler(
             self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter
@@ -324,7 +330,8 @@ def resume_train(self, model):
 
     def evaluate_and_save_model(self):
         # calculate loss
-        self.calculate_eval_loss()
+        if self.calc_validation_loss:
+            self.calculate_eval_loss()
 
         if self.use_model_ema:
             evalmodel = self.ema_model.ema
diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py
index 097a9c5e3..51d9337c4 100644
--- a/yolox/exp/yolox_base.py
+++ b/yolox/exp/yolox_base.py
@@ -81,6 +81,8 @@ def __init__(self):
         self.no_aug_epochs = 15
         # apply EMA during training
         self.ema = True
+        # calculate validation loss
+        self.calc_val_loss = False
 
         # weight decay of optimizer
         self.weight_decay = 5e-4

From da181cfbfdd58ef7398d8f1d63e7df4280b649e8 Mon Sep 17 00:00:00 2001
From: Aditya-Bobade <aditya.bobade@b-yond.com>
Date: Fri, 20 May 2022 17:00:17 +0530
Subject: [PATCH 13/30] average validation loss logging

---
 yolox/core/trainer.py | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index 3dedda515..4d400e66f 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -5,6 +5,7 @@
 import datetime
 import os
 import time
+import numpy as np
 from loguru import logger
 
 import torch
@@ -387,6 +388,13 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False):
                 self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt)
 
     def calculate_eval_loss(self):
+        loss = {
+            "total_loss": [],
+            "iou_loss": [],
+            "l1_loss": [],
+            "conf_loss": [],
+            "cls_loss": []
+        }
         for iter in range(self.max_val_iter):
             inps, targets = self.val_prefetcher.next()
             inps = inps.to(self.data_type)
@@ -397,19 +405,18 @@ def calculate_eval_loss(self):
             with torch.cuda.amp.autocast(enabled=self.amp_training):
                 outputs = self.model(inps, targets)
 
-            loss = {
-                "total_loss": outputs["total_loss"],
-                "iou_loss": outputs["iou_loss"],
-                "l1_loss": outputs["l1_loss"],
-                "conf_loss": outputs["conf_loss"],
-                "cls_loss": outputs["cls_loss"]
-            }
-            progress_str = "epoch: {}/{}, iter: {}/{},".format(
-                self.epoch + 1, self.max_epoch, iter + 1, self.max_val_iter
-            )
+            loss["total_loss"].append(outputs["total_loss"])
+            loss["iou_loss"].append(outputs["iou_loss"])
+            loss["l1_loss"].append(outputs["l1_loss"])
+            loss["conf_loss"].append(outputs["conf_loss"])
+            loss["cls_loss"].append(outputs["cls_loss"])
+
+        progress_str = "epoch: {}/{},".format(
+            self.epoch + 1, self.max_epoch
+        )
 
-            for loss_name, loss_value in loss.items():
-                progress_str += " {}: {:.1f}".format(loss_name, loss_value)
-                self.neptune[f"loss/val/{loss_name}"].log(loss_value)
+        for loss_name, loss_value in loss.items():
+            progress_str += " {}: {:.1f}".format(loss_name, np.nanmean(loss_value))
+            self.neptune[f"loss/val/{loss_name}"].log(np.nanmean(loss_value))
 
-            logger.info("Validation:{}".format(progress_str))
+        logger.info("Validation:{}".format(progress_str))

From 47eccabd586bfea33d893a50e07a24fb08bcc38e Mon Sep 17 00:00:00 2001
From: Aditya-Bobade <aditya.bobade@b-yond.com>
Date: Fri, 20 May 2022 19:58:48 +0530
Subject: [PATCH 14/30] remove average validation loss logging

---
 yolox/core/trainer.py | 39 ++++++++++++++-------------------------
 1 file changed, 14 insertions(+), 25 deletions(-)

diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index 4d400e66f..215e47c5d 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -5,7 +5,6 @@
 import datetime
 import os
 import time
-import numpy as np
 from loguru import logger
 
 import torch
@@ -388,35 +387,25 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False):
                 self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt)
 
     def calculate_eval_loss(self):
-        loss = {
-            "total_loss": [],
-            "iou_loss": [],
-            "l1_loss": [],
-            "conf_loss": [],
-            "cls_loss": []
-        }
         for iter in range(self.max_val_iter):
             inps, targets = self.val_prefetcher.next()
             inps = inps.to(self.data_type)
             targets = targets.to(self.data_type)
             targets.requires_grad = False
             inps, targets = self.exp.preprocess(inps, targets, self.input_size)
-
             with torch.cuda.amp.autocast(enabled=self.amp_training):
                 outputs = self.model(inps, targets)
-
-            loss["total_loss"].append(outputs["total_loss"])
-            loss["iou_loss"].append(outputs["iou_loss"])
-            loss["l1_loss"].append(outputs["l1_loss"])
-            loss["conf_loss"].append(outputs["conf_loss"])
-            loss["cls_loss"].append(outputs["cls_loss"])
-
-        progress_str = "epoch: {}/{},".format(
-            self.epoch + 1, self.max_epoch
-        )
-
-        for loss_name, loss_value in loss.items():
-            progress_str += " {}: {:.1f}".format(loss_name, np.nanmean(loss_value))
-            self.neptune[f"loss/val/{loss_name}"].log(np.nanmean(loss_value))
-
-        logger.info("Validation:{}".format(progress_str))
+            loss = {
+                "total_loss": outputs["total_loss"],
+                "iou_loss": outputs["iou_loss"],
+                "l1_loss": outputs["l1_loss"],
+                "conf_loss": outputs["conf_loss"],
+                "cls_loss": outputs["cls_loss"]
+            }
+            progress_str = "epoch: {}/{}, iter: {}/{},".format(
+                self.epoch + 1, self.max_epoch, iter + 1, self.max_val_iter
+            )
+            for loss_name, loss_value in loss.items():
+                progress_str += " {}: {:.1f},".format(loss_name, loss_value)
+                self.neptune[f"loss/val/{loss_name}"].log(loss_value)
+            logger.info("Validation:{}".format(progress_str))

From 1c31df47418a12f29ec18fb3783380200ae16ef8 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-46-219.us-west-2.compute.internal>
Date: Tue, 24 May 2022 11:10:55 +0000
Subject: [PATCH 15/30] mosaic_prob !=1 bug fix

---
 yolox/data/datasets/mosaicdetection.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/yolox/data/datasets/mosaicdetection.py b/yolox/data/datasets/mosaicdetection.py
index 708babed5..25a0b0625 100644
--- a/yolox/data/datasets/mosaicdetection.py
+++ b/yolox/data/datasets/mosaicdetection.py
@@ -6,6 +6,7 @@
 
 import cv2
 import numpy as np
+import torch
 
 from yolox.utils import adjust_box_anns, get_local_rank
 
@@ -151,6 +152,7 @@ def __getitem__(self, idx):
             # img_info and img_id are not used for training.
             # They are also hard to be specified on a mosaic image.
             # -----------------------------------------------------------------
+            img_id = torch.tensor(np.array(img_id), dtype=torch.long)
             return mix_img, padded_labels, img_info, img_id
 
         else:

From c8fcf89f078706b0c6550b37b7cfbbbb9ae114ca Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-4-230.us-west-2.compute.internal>
Date: Tue, 31 May 2022 14:10:51 +0000
Subject: [PATCH 16/30] adding copy paste augmentations

---
 yolox/data/data_augment.py             | 39 ++++++++++++++++++++++++++
 yolox/data/datasets/mosaicdetection.py | 23 +++++++++++++--
 2 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/yolox/data/data_augment.py b/yolox/data/data_augment.py
index 21cd7b56d..587c3b215 100644
--- a/yolox/data/data_augment.py
+++ b/yolox/data/data_augment.py
@@ -157,6 +157,45 @@ def preproc(img, input_size, swap=(2, 0, 1)):
     padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
     return padded_img, r
 
+def copy_paste(img, paste_img, labels, paste_labels, prob=0.5, obj_proc=0.5):
+    img_h, img_w = img.shape[:2]
+    paste_labels = paste_labels.astype(int)
+    objects_to_paste = paste_labels[random.sample(
+        range(0, len(paste_labels) - 1), int(len(paste_labels) * obj_proc)
+    )]
+    if len(objects_to_paste) == 0:
+        return img, labels
+    cropped_objects = {
+        idx: paste_img[object[1]:object[3], object[0]:object[2]] 
+        for idx, object in enumerate(objects_to_paste)
+    }
+    #50% chance to flip the object
+    for idx, obj in cropped_objects.items():
+        if random.random() > 0.5:
+            cropped_objects[idx] = obj[:,::-1]
+    new_coords = {
+        idx: (
+            random.randint(0, img_w - (object[2] - object[0])),
+            random.randint(0, img_h - (object[3] - object[1]))
+        )
+        for idx, object in enumerate(objects_to_paste)
+    }
+    new_labels = []
+    for idx, coords in new_coords.items():
+        new_labels.append(np.array([
+            coords[0], 
+            coords[1], 
+            coords[0] + objects_to_paste[idx][2]- objects_to_paste[idx][0],
+            coords[1] + objects_to_paste[idx][3]- objects_to_paste[idx][1],
+            objects_to_paste[idx][4]
+        ]))
+    for idx, object in enumerate(new_labels):
+        img[object[1]:object[3], object[0]:object[2]] = cropped_objects[idx]
+    labels = np.append(labels, new_labels, 0)
+    return img, labels
+    
+    
+
 
 class TrainTransform:
     def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0):
diff --git a/yolox/data/datasets/mosaicdetection.py b/yolox/data/datasets/mosaicdetection.py
index 25a0b0625..014ec4d3d 100644
--- a/yolox/data/datasets/mosaicdetection.py
+++ b/yolox/data/datasets/mosaicdetection.py
@@ -9,7 +9,7 @@
 import torch
 
 from yolox.utils import adjust_box_anns, get_local_rank
-
+from yolox.data.data_augment import copy_paste
 from ..data_augment import random_affine
 from .datasets_wrapper import Dataset
 
@@ -42,7 +42,8 @@ def __init__(
         self, dataset, img_size, mosaic=True, preproc=None,
         degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5),
         mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True,
-        mosaic_prob=1.0, mixup_prob=1.0, *args
+        mosaic_prob=1.0, mixup_prob=1.0, copy_paste_prob=0.5,
+        copy_paste_obj_proc=0.5, *args
     ):
         """
 
@@ -65,6 +66,8 @@ def __init__(
         self.degrees = degrees
         self.translate = translate
         self.scale = mosaic_scale
+        self.copy_paste_prob = copy_paste_prob
+        self.copy_paste_prob = copy_paste_obj_proc
         self.shear = shear
         self.mixup_scale = mixup_scale
         self.enable_mosaic = mosaic
@@ -92,6 +95,14 @@ def __getitem__(self, idx):
 
             for i_mosaic, index in enumerate(indices):
                 img, _labels, _, img_id = self._dataset.pull_item(index)
+
+                if self.copy_paste_prob is not None and self.copy_paste_prob!=0.0:
+                    random_idx = index
+                    while random_idx==index:
+                        random_idx = random.randint(0, len(self._dataset.annotations)-1)
+                    paste_img, paste_label, _, _ = self._dataset.pull_item(random_idx)
+                    img, _labels = copy_paste(img, paste_img, _labels, paste_label, self.copy_paste_prob)
+
                 h0, w0 = img.shape[:2]  # orig hw
                 scale = min(1. * input_h / h0, 1. * input_w / w0)
                 img = cv2.resize(
@@ -158,6 +169,14 @@ def __getitem__(self, idx):
         else:
             self._dataset._input_dim = self.input_dim
             img, label, img_info, img_id = self._dataset.pull_item(idx)
+
+            if self.copy_paste_prob is not None and self.copy_paste_prob!=0.0:
+                random_idx = idx
+                while random_idx==idx:
+                    random_idx = random.randint(0, len(self._dataset.annotations)-1)
+                paste_img, paste_label, _, _ = self._dataset.pull_item(random_idx)
+                img, label = copy_paste(img, paste_img, label, paste_label, self.copy_paste_prob)
+
             img, label = self.preproc(img, label, self.input_dim)
             return img, label, img_info, img_id
 

From 3343d073834d06ef60d3ad45b5faceb7c7f870ae Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-4-230.us-west-2.compute.internal>
Date: Tue, 31 May 2022 14:48:10 +0000
Subject: [PATCH 17/30] refactor

---
 yolox/data/data_augment.py | 66 ++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 35 deletions(-)

diff --git a/yolox/data/data_augment.py b/yolox/data/data_augment.py
index 587c3b215..5952026d7 100644
--- a/yolox/data/data_augment.py
+++ b/yolox/data/data_augment.py
@@ -158,43 +158,39 @@ def preproc(img, input_size, swap=(2, 0, 1)):
     return padded_img, r
 
 def copy_paste(img, paste_img, labels, paste_labels, prob=0.5, obj_proc=0.5):
-    img_h, img_w = img.shape[:2]
-    paste_labels = paste_labels.astype(int)
-    objects_to_paste = paste_labels[random.sample(
-        range(0, len(paste_labels) - 1), int(len(paste_labels) * obj_proc)
-    )]
-    if len(objects_to_paste) == 0:
-        return img, labels
-    cropped_objects = {
-        idx: paste_img[object[1]:object[3], object[0]:object[2]] 
-        for idx, object in enumerate(objects_to_paste)
-    }
-    #50% chance to flip the object
-    for idx, obj in cropped_objects.items():
-        if random.random() > 0.5:
-            cropped_objects[idx] = obj[:,::-1]
-    new_coords = {
-        idx: (
-            random.randint(0, img_w - (object[2] - object[0])),
-            random.randint(0, img_h - (object[3] - object[1]))
-        )
-        for idx, object in enumerate(objects_to_paste)
-    }
-    new_labels = []
-    for idx, coords in new_coords.items():
-        new_labels.append(np.array([
-            coords[0], 
-            coords[1], 
-            coords[0] + objects_to_paste[idx][2]- objects_to_paste[idx][0],
-            coords[1] + objects_to_paste[idx][3]- objects_to_paste[idx][1],
-            objects_to_paste[idx][4]
-        ]))
-    for idx, object in enumerate(new_labels):
-        img[object[1]:object[3], object[0]:object[2]] = cropped_objects[idx]
-    labels = np.append(labels, new_labels, 0)
+    if random.random() > prob:
+        img_h, img_w = img.shape[:2]
+        paste_labels = paste_labels.astype(int)
+        objects_to_paste = paste_labels[random.sample(
+            range(0, len(paste_labels) - 1), int(len(paste_labels) * obj_proc)
+        )]
+        if len(objects_to_paste) == 0:
+            return img, labels
+        cropped_objects = [
+            paste_img[object[1]:object[3], object[0]:object[2]] 
+            for object in objects_to_paste
+        ]
+        #50% chance to flip the object
+        for idx, obj in enumerate(cropped_objects):
+            if random.random() > 0.5:
+                cropped_objects[idx] = obj[:,::-1]
+        new_labels = []
+        for idx, object in enumerate(objects_to_paste):
+            new_x = random.randint(0, img_w - (object[2] - object[0]))
+            new_y = random.randint(0, img_h - (object[3] - object[1]))
+            new_labels.append(np.array([
+                new_x,
+                new_y,
+                new_x + (object[2] - object[0]),
+                new_y + (object[3] - object[1]),
+                object[4]
+            ]))  
+        for idx, object in enumerate(new_labels):
+            img[object[1]:object[3], object[0]:object[2]] = cropped_objects[idx]
+        labels = np.append(labels, new_labels, 0)
+        breakpoint()
     return img, labels
     
-    
 
 
 class TrainTransform:

From 4ab2e63f96c03f8872e7093b0ea4966fa824a141 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-4-230.us-west-2.compute.internal>
Date: Wed, 1 Jun 2022 09:29:17 +0000
Subject: [PATCH 18/30] improving speed

---
 yolox/data/data_augment.py | 31 +++++++++++--------------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/yolox/data/data_augment.py b/yolox/data/data_augment.py
index 5952026d7..805ef876a 100644
--- a/yolox/data/data_augment.py
+++ b/yolox/data/data_augment.py
@@ -166,29 +166,20 @@ def copy_paste(img, paste_img, labels, paste_labels, prob=0.5, obj_proc=0.5):
         )]
         if len(objects_to_paste) == 0:
             return img, labels
-        cropped_objects = [
-            paste_img[object[1]:object[3], object[0]:object[2]] 
-            for object in objects_to_paste
-        ]
-        #50% chance to flip the object
-        for idx, obj in enumerate(cropped_objects):
-            if random.random() > 0.5:
-                cropped_objects[idx] = obj[:,::-1]
         new_labels = []
-        for idx, object in enumerate(objects_to_paste):
-            new_x = random.randint(0, img_w - (object[2] - object[0]))
-            new_y = random.randint(0, img_h - (object[3] - object[1]))
+        for obj in objects_to_paste:
+            cropped_obj = paste_img[obj[1]:obj[3], obj[0]:obj[2]]
+            if random.random() > 0.5:
+                cropped_obj = cropped_obj[:,::-1]
+            new_x_min = random.randint(0, img_w - (obj[2] - obj[0]))
+            new_y_min = random.randint(0, img_h - (obj[3] - obj[1]))
+            new_x_max = new_x_min + (obj[2] - obj[0])
+            new_y_max = new_y_min + (obj[3] - obj[1])
             new_labels.append(np.array([
-                new_x,
-                new_y,
-                new_x + (object[2] - object[0]),
-                new_y + (object[3] - object[1]),
-                object[4]
-            ]))  
-        for idx, object in enumerate(new_labels):
-            img[object[1]:object[3], object[0]:object[2]] = cropped_objects[idx]
+                new_x_min, new_y_min, new_x_max, new_y_max, obj[4]
+            ]))
+            img[new_y_min:new_y_max, new_x_min:new_x_max] = cropped_obj
         labels = np.append(labels, new_labels, 0)
-        breakpoint()
     return img, labels
     
 

From 1a8b0dced1d64ebb2d5fe0d09f9413535d162709 Mon Sep 17 00:00:00 2001
From: Dawid Stachowiak <dawid.stachowiak@pascal01.intra.deepsense.ai>
Date: Fri, 3 Jun 2022 14:20:41 +0200
Subject: [PATCH 19/30] added postprocessing

---
 yolox/utils/boxes.py | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/yolox/utils/boxes.py b/yolox/utils/boxes.py
index dbe10d957..f56bf0679 100644
--- a/yolox/utils/boxes.py
+++ b/yolox/utils/boxes.py
@@ -44,11 +44,17 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
         if not image_pred.size(0):
             continue
         # Get score and class with highest confidence
-        class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
+        class_confs = image_pred[:, 5: 5 + num_classes]
+        top_confs, top_classes = torch.topk(class_confs, num_classes, 1, sorted=True)
+        class_conf = top_confs[:,0].unsqueeze(1)
+        class_pred = top_classes[:,0].unsqueeze(1)
 
-        conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
+        conf_mask = (image_pred[:, 4] * top_confs[:,0].squeeze() >= conf_thre).squeeze()
         # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
+        
         detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
+        
+        top_classes = top_classes[conf_mask]
         detections = detections[conf_mask]
         if not detections.size(0):
             continue
@@ -67,15 +73,36 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
                 nms_thre,
             )
 
+        top_classes = top_classes[nms_out_index]
         detections = detections[nms_out_index]
+        detections = preprocess_double_class_instances(detections, top_classes)
+
         if output[i] is None:
             output[i] = detections
         else:
             output[i] = torch.cat((output[i], detections))
 
+
     return output
 
 
+def preprocess_double_class_instances(detections, top_classes):
+    used_class = []
+    sorted_dets = sorted(enumerate(detections), key=lambda x:x[1][-2], reverse=True)
+    for idx, det in sorted_dets:
+        class_id = int(det[-1])
+        if class_id not in used_class:
+            used_class.append(int(det[-1]))
+            continue
+        else:
+            idx_counter = 0
+            while class_id in used_class:
+                idx_counter += 1
+                class_id = int(top_classes[idx][idx_counter])
+            detections[idx][-1] = float(class_id)
+    return detections
+                
+
 def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
     if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
         raise IndexError

From d7a1ad197c427a08dce48fbabd7cec4ab901e2c5 Mon Sep 17 00:00:00 2001
From: Dawid Stachowiak <dawid.stachowiak@pascal01.intra.deepsense.ai>
Date: Wed, 15 Jun 2022 13:51:11 +0200
Subject: [PATCH 20/30] changed integration from neptune to mlflow

---
 tools/train.py            | 36 +++++++++++++++++++-----------------
 yolox/core/trainer.py     | 16 ++++++++--------
 yolox/exp/base_exp.py     | 21 +++++----------------
 yolox/utils/checkpoint.py |  8 ++++----
 4 files changed, 36 insertions(+), 45 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index cfbf19954..953eb66d6 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -5,6 +5,7 @@
 import argparse
 import random
 import warnings
+import mlflow
 from loguru import logger
 
 import yaml
@@ -104,7 +105,7 @@ def make_parser():
 
 
 @logger.catch
-def main(exp, args):
+def main(exp, run, args):
     if exp.seed is not None:
         random.seed(exp.seed)
         torch.manual_seed(exp.seed)
@@ -120,17 +121,15 @@ def main(exp, args):
     configure_omp()
     cudnn.benchmark = True
 
-    trainer = Trainer(exp, args)
+    trainer = Trainer(exp, run, args)
     trainer.train()
 
 
 if __name__ == "__main__":
     args = make_parser().parse_args()
     exp = get_exp(args.exp_file, args.name)
-
-    #TODO: Add neptune logging with multidevice training. Logging now works only 
-    # on 1 gpu device training, not working with multiprocessing.
-    exp.set_neptune_logging(True)
+    mlflow.set_tracking_uri('http://127.0.0.1:5000')
+    run = mlflow.start_run()
 
     exp.merge(args.opts)
 
@@ -140,18 +139,21 @@ def main(exp, args):
     if args.config_filepath is not None:
         with open(args.config_filepath, "r") as f:
             config = yaml.safe_load(f)
-        exp.add_params_from_config(config, use_neptune=True)
-        exp.neptune['config_file'].upload(args.config_filepath)
     num_gpu = get_num_devices() if args.devices is None else args.devices
     assert num_gpu <= get_num_devices()
 
     dist_url = "auto" if args.dist_url is None else args.dist_url
-    launch(
-        main,
-        num_gpu,
-        args.num_machines,
-        args.machine_rank,
-        backend=args.dist_backend,
-        dist_url=dist_url,
-        args=(exp, args),
-    )
+    with run:
+        if args.config_filepath is not None:
+            mlflow.log_artifact(args.config_filepath, 'config_file')
+            exp.run = run
+            exp.add_params_from_config(config, use_mlflow=True)
+        launch(
+            main,
+            num_gpu,
+            args.num_machines,
+            args.machine_rank,
+            backend=args.dist_backend,
+            dist_url=dist_url,
+            args=(exp, run, args),
+        )
diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index 215e47c5d..2535429ef 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -5,6 +5,7 @@
 import datetime
 import os
 import time
+import mlflow
 from loguru import logger
 
 import torch
@@ -33,12 +34,12 @@
 
 
 class Trainer:
-    def __init__(self, exp, args):
+    def __init__(self, exp, run, args):
         # init function only defines some basic attr, other attrs like model, optimizer are built in
         # before_train methods.
         self.exp = exp
         self.args = args
-        self.neptune = self.exp.neptune
+        self.run = run
         # training related attr
         self.max_epoch = exp.max_epoch
         self.amp_training = args.fp16
@@ -117,7 +118,7 @@ def train_one_iter(self):
             self.ema_model.update(self.model)
 
         lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1)
-        self.neptune['config/lr'].log(lr)
+        mlflow.log_metric("lr", lr)
         for param_group in self.optimizer.param_groups:
             param_group["lr"] = lr
 
@@ -259,7 +260,7 @@ def after_iter(self):
                 ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
             )
             for loss_name, loss_value in loss_meter.items():
-                self.neptune[f"loss/{loss_name}"].log(loss_value.latest)
+                mlflow.log_metric(f"loss/{loss_name}", loss_value.latest)
             time_meter = self.meter.get_filtered_meter("time")
             time_str = ", ".join(
                 ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]
@@ -347,7 +348,7 @@ def evaluate_and_save_model(self):
 
         update_best_ckpt = ap50_95 > self.best_ap
         self.best_ap = max(self.best_ap, ap50_95)
-        self.neptune['metrics/best_ap'].log(self.best_ap)
+        mlflow.log_metric(f"metrics/best_ap", self.best_ap)
         if self.rank == 0:
             if self.args.logger == "tensorboard":
                 self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
@@ -380,9 +381,8 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False):
                 update_best_ckpt,
                 self.file_name,
                 ckpt_name,
-                self.neptune,
+                self.run,
             )
-
             if self.args.logger == "wandb":
                 self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt)
 
@@ -407,5 +407,5 @@ def calculate_eval_loss(self):
             )
             for loss_name, loss_value in loss.items():
                 progress_str += " {}: {:.1f},".format(loss_name, loss_value)
-                self.neptune[f"loss/val/{loss_name}"].log(loss_value)
+                mlflow.log_metric(f"loss/val/{loss_name}", loss_value)
             logger.info("Validation:{}".format(progress_str))
diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py
index 1aabd0d51..789a41590 100644
--- a/yolox/exp/base_exp.py
+++ b/yolox/exp/base_exp.py
@@ -6,9 +6,9 @@
 import pprint
 from abc import ABCMeta, abstractmethod
 from typing import Dict
+import mlflow
 from tabulate import tabulate
 
-import neptune.new as neptune
 import torch
 from torch.nn import Module
 
@@ -23,7 +23,7 @@ def __init__(self):
         self.output_dir = "./YOLOX_outputs"
         self.print_interval = 100
         self.eval_interval = 10
-        self.neptune = None
+        self.run = None
 
     @abstractmethod
     def get_model(self) -> Module:
@@ -62,17 +62,6 @@ def __repr__(self):
         ]
         return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
 
-    def set_neptune_logging(self, state):
-        if state:
-            self.neptune = neptune.init(
-            project="jakub.pingielski/b-yond",
-            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NTlkYzZmZC1kZTY5LTQ2NjMtODFkZC04YmY4NTNmYTkwMTIifQ==",
-        )
-        else:
-            if self.neptune is not None:
-                self.neptune.stop()
-            self.neptune = None
-
     def merge(self, cfg_list):
         assert len(cfg_list) % 2 == 0
         for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
@@ -87,13 +76,13 @@ def merge(self, cfg_list):
                         v = ast.literal_eval(v)
                 setattr(self, k, v)
 
-    def add_params_from_config(self, config: dict, use_neptune: bool = True):
+    def add_params_from_config(self, config: dict, use_mlflow: bool = False):
         for key, value in config.items():
             if key == "dataset_version":
                 setattr(self, "dataset_dir", DATASETS_PATH / value)
             else:
                 setattr(self, key, value)
-            if use_neptune and self.neptune:
-                self.neptune[f"config/{key}"].log(value)
+            if use_mlflow and self.run:
+                mlflow.log_param(key, value)
 
 
diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py
index e7d732bef..8a8997ad4 100644
--- a/yolox/utils/checkpoint.py
+++ b/yolox/utils/checkpoint.py
@@ -4,7 +4,7 @@
 import os
 import shutil
 from loguru import logger
-import neptune.new as neptune
+import mlflow
 
 import torch
 
@@ -34,7 +34,7 @@ def load_ckpt(model, ckpt):
     return model
 
 
-def save_checkpoint(state, is_best, save_dir, model_name="", neptune=None):
+def save_checkpoint(state, is_best, save_dir, model_name="", run=None):
     if not os.path.exists(save_dir):
         os.makedirs(save_dir)
     filename = os.path.join(save_dir, model_name + "_ckpt.pth")
@@ -42,5 +42,5 @@ def save_checkpoint(state, is_best, save_dir, model_name="", neptune=None):
     if is_best:
         best_filename = os.path.join(save_dir, "best_ckpt.pth")
         shutil.copyfile(filename, best_filename)
-        if neptune:
-            neptune['best_checkpoint'].upload(best_filename)
+        if run:
+            mlflow.log_artifact(best_filename, 'best_checkpoint')

From b2f55a006dde3f8efecb11a60ae002f72fea68df Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-4-230.us-west-2.compute.internal>
Date: Thu, 23 Jun 2022 10:35:24 +0000
Subject: [PATCH 21/30] added exp connection

---
 tools/train.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/train.py b/tools/train.py
index 953eb66d6..7f19a79b6 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -128,7 +128,8 @@ def main(exp, run, args):
 if __name__ == "__main__":
     args = make_parser().parse_args()
     exp = get_exp(args.exp_file, args.name)
-    mlflow.set_tracking_uri('http://127.0.0.1:5000')
+    mlflow.set_tracking_uri('http://localhost:5000')
+    mlflow.set_experiment('phoenix-suns-vz-ar')
     run = mlflow.start_run()
 
     exp.merge(args.opts)

From 49b7d4b14e68026a836c9c275e761b41e1296412 Mon Sep 17 00:00:00 2001
From: Dawid Stachowiak <dawid.stachowiak@pascal01.intra.deepsense.ai>
Date: Thu, 30 Jun 2022 12:15:49 +0200
Subject: [PATCH 22/30] parametrized mlflow connection

---
 tools/train.py        | 22 +++++++++++++++++-----
 yolox/core/trainer.py | 12 ++++++++----
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index 953eb66d6..577633411 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -101,6 +101,13 @@ def make_parser():
         default=None,
         nargs=argparse.REMAINDER,
     )
+    parser.add_argument(
+        "-ml",
+        "--mlflow-url",
+        type=str,
+        help="MLFlow instance url for logging metrics and files.",
+        default=None
+    )
     return parser
 
 
@@ -128,8 +135,9 @@ def main(exp, run, args):
 if __name__ == "__main__":
     args = make_parser().parse_args()
     exp = get_exp(args.exp_file, args.name)
-    mlflow.set_tracking_uri('http://127.0.0.1:5000')
-    run = mlflow.start_run()
+    if args.mlflow_url is not None:
+        mlflow.set_tracking_uri(args.mlflow_url)
+        run = mlflow.start_run()
 
     exp.merge(args.opts)
 
@@ -145,9 +153,13 @@ def main(exp, run, args):
     dist_url = "auto" if args.dist_url is None else args.dist_url
     with run:
         if args.config_filepath is not None:
-            mlflow.log_artifact(args.config_filepath, 'config_file')
-            exp.run = run
-            exp.add_params_from_config(config, use_mlflow=True)
+            run = None
+            if args.mlflow_url is not None:
+                mlflow.log_artifact(args.config_filepath, 'config_file')
+                exp.run = run
+                exp.add_params_from_config(config, use_mlflow=True)
+            else:
+                exp.add_params_from_config(config)
         launch(
             main,
             num_gpu,
diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index 2535429ef..c4da37e4e 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -118,7 +118,8 @@ def train_one_iter(self):
             self.ema_model.update(self.model)
 
         lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1)
-        mlflow.log_metric("lr", lr)
+        if self.run is not None:
+            mlflow.log_metric("lr", lr)
         for param_group in self.optimizer.param_groups:
             param_group["lr"] = lr
 
@@ -260,7 +261,8 @@ def after_iter(self):
                 ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
             )
             for loss_name, loss_value in loss_meter.items():
-                mlflow.log_metric(f"loss/{loss_name}", loss_value.latest)
+                if self.run is not None:
+                    mlflow.log_metric(f"loss/{loss_name}", loss_value.latest)
             time_meter = self.meter.get_filtered_meter("time")
             time_str = ", ".join(
                 ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]
@@ -348,7 +350,8 @@ def evaluate_and_save_model(self):
 
         update_best_ckpt = ap50_95 > self.best_ap
         self.best_ap = max(self.best_ap, ap50_95)
-        mlflow.log_metric(f"metrics/best_ap", self.best_ap)
+        if self.run is not None:
+            mlflow.log_metric(f"metrics/best_ap", self.best_ap)
         if self.rank == 0:
             if self.args.logger == "tensorboard":
                 self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1)
@@ -407,5 +410,6 @@ def calculate_eval_loss(self):
             )
             for loss_name, loss_value in loss.items():
                 progress_str += " {}: {:.1f},".format(loss_name, loss_value)
-                mlflow.log_metric(f"loss/val/{loss_name}", loss_value)
+                if self.run is not None:
+                    mlflow.log_metric(f"loss/val/{loss_name}", loss_value)
             logger.info("Validation:{}".format(progress_str))

From dd91165f6d636270fb048f22d58c29cc2505b799 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-4-230.us-west-2.compute.internal>
Date: Thu, 30 Jun 2022 13:19:29 +0000
Subject: [PATCH 23/30] added configurable exp name

---
 tools/train.py | 56 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 19 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index 577633411..c573258ba 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -103,11 +103,18 @@ def make_parser():
     )
     parser.add_argument(
         "-ml",
-        "--mlflow-url",
+        "--mlflow_url",
         type=str,
         help="MLFlow instance url for logging metrics and files.",
         default=None
     )
+    parser.add_argument(
+        "-mlex",
+        "--mlflow_experiment_name",
+        type=str,
+        help="Experiment name to log metrics and files",
+        default=None
+    )
     return parser
 
 
@@ -135,9 +142,11 @@ def main(exp, run, args):
 if __name__ == "__main__":
     args = make_parser().parse_args()
     exp = get_exp(args.exp_file, args.name)
-    if args.mlflow_url is not None:
+    run = None
+    if args.mlflow_url is not None and args.mlflow_experiment_name is not None:
         mlflow.set_tracking_uri(args.mlflow_url)
-        run = mlflow.start_run()
+        experiment = mlflow.get_experiment_by_name(args.mlflow_experiment_name)
+        run = mlflow.start_run(experiment_id=experiment.experiment_id)
 
     exp.merge(args.opts)
 
@@ -151,21 +160,30 @@ def main(exp, run, args):
     assert num_gpu <= get_num_devices()
 
     dist_url = "auto" if args.dist_url is None else args.dist_url
-    with run:
+    if run is not None:
+        with run:
+            if args.config_filepath is not None:
+                    mlflow.log_artifact(args.config_filepath, 'config_file')
+                    exp.run = run
+                    exp.add_params_from_config(config, use_mlflow=True)
+            launch(
+                main,
+                num_gpu,
+                args.num_machines,
+                args.machine_rank,
+                backend=args.dist_backend,
+                dist_url=dist_url,
+                args=(exp, run, args),
+            )
+    else:
         if args.config_filepath is not None:
-            run = None
-            if args.mlflow_url is not None:
-                mlflow.log_artifact(args.config_filepath, 'config_file')
-                exp.run = run
-                exp.add_params_from_config(config, use_mlflow=True)
-            else:
-                exp.add_params_from_config(config)
+            exp.add_params_from_config(config)
         launch(
-            main,
-            num_gpu,
-            args.num_machines,
-            args.machine_rank,
-            backend=args.dist_backend,
-            dist_url=dist_url,
-            args=(exp, run, args),
-        )
+                main,
+                num_gpu,
+                args.num_machines,
+                args.machine_rank,
+                backend=args.dist_backend,
+                dist_url=dist_url,
+                args=(exp, run, args),
+            )

From 58e19c001cfdb90a17f9b0aedf2ced1152518f91 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-46-219.us-west-2.compute.internal>
Date: Mon, 4 Jul 2022 13:50:06 +0000
Subject: [PATCH 24/30] changes for classes by config refactor

---
 yolox/core/trainer.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index c4da37e4e..a05564385 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -261,8 +261,16 @@ def after_iter(self):
                 ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
             )
             for loss_name, loss_value in loss_meter.items():
+<<<<<<< Updated upstream
+<<<<<<< Updated upstream
                 if self.run is not None:
                     mlflow.log_metric(f"loss/{loss_name}", loss_value.latest)
+=======
+                self.neptune[loss_name].log(loss_value.latest)
+>>>>>>> Stashed changes
+=======
+                self.neptune[loss_name].log(loss_value.latest)
+>>>>>>> Stashed changes
             time_meter = self.meter.get_filtered_meter("time")
             time_str = ", ".join(
                 ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]

From 01279e1919c28f88a141a00170253459616717f1 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-46-219.us-west-2.compute.internal>
Date: Mon, 4 Jul 2022 14:01:06 +0000
Subject: [PATCH 25/30] repair bug

---
 yolox/core/trainer.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py
index a05564385..c4da37e4e 100644
--- a/yolox/core/trainer.py
+++ b/yolox/core/trainer.py
@@ -261,16 +261,8 @@ def after_iter(self):
                 ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()]
             )
             for loss_name, loss_value in loss_meter.items():
-<<<<<<< Updated upstream
-<<<<<<< Updated upstream
                 if self.run is not None:
                     mlflow.log_metric(f"loss/{loss_name}", loss_value.latest)
-=======
-                self.neptune[loss_name].log(loss_value.latest)
->>>>>>> Stashed changes
-=======
-                self.neptune[loss_name].log(loss_value.latest)
->>>>>>> Stashed changes
             time_meter = self.meter.get_filtered_meter("time")
             time_str = ", ".join(
                 ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()]

From b9315019666831992772e2fe52ff9ae4fdc80f0f Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-46-219.us-west-2.compute.internal>
Date: Tue, 5 Jul 2022 12:54:01 +0000
Subject: [PATCH 26/30] fixes regarding training

---
 tools/train.py        | 6 +++---
 yolox/exp/base_exp.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/train.py b/tools/train.py
index c573258ba..6de7d7155 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -163,9 +163,9 @@ def main(exp, run, args):
     if run is not None:
         with run:
             if args.config_filepath is not None:
-                    mlflow.log_artifact(args.config_filepath, 'config_file')
-                    exp.run = run
-                    exp.add_params_from_config(config, use_mlflow=True)
+                mlflow.log_artifact(args.config_filepath, 'config_file')
+                exp.run = run
+                exp.add_params_from_config(config, use_mlflow=True)
             launch(
                 main,
                 num_gpu,
diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py
index 789a41590..833045b45 100644
--- a/yolox/exp/base_exp.py
+++ b/yolox/exp/base_exp.py
@@ -82,7 +82,7 @@ def add_params_from_config(self, config: dict, use_mlflow: bool = False):
                 setattr(self, "dataset_dir", DATASETS_PATH / value)
             else:
                 setattr(self, key, value)
-            if use_mlflow and self.run:
+            if use_mlflow and self.run and key != "classes_mapping":
                 mlflow.log_param(key, value)
 
 

From be92108a25dedf1abbc3a378cb7241711f52053f Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-46-219.us-west-2.compute.internal>
Date: Tue, 5 Jul 2022 13:22:25 +0000
Subject: [PATCH 27/30] fix errors in true divide

---
 yolox/evaluators/voc_eval.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/yolox/evaluators/voc_eval.py b/yolox/evaluators/voc_eval.py
index a9e85d3fc..35a13ee86 100644
--- a/yolox/evaluators/voc_eval.py
+++ b/yolox/evaluators/voc_eval.py
@@ -10,6 +10,7 @@
 import xml.etree.ElementTree as ET
 
 import numpy as np
+np.seterr(invalid='ignore')
 
 
 def parse_rec(filename):

From e7e9a23bbaee35ba8df57e03ac82256b891283f2 Mon Sep 17 00:00:00 2001
From: Aditya-Bobade <aditya.bobade@b-yond.com>
Date: Tue, 5 Jul 2022 20:49:05 +0530
Subject: [PATCH 28/30] install specific library version

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 05a655866..712c0c5c0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,11 @@
 # TODO: Update with exact module version
 numpy
-torch>=1.7
+torch==1.11.0
 opencv_python
 loguru
 scikit-image
 tqdm
-torchvision
+torchvision==0.12.0
 Pillow
 thop
 ninja

From 7c346adc819bbf35e351d1635e48e159a731c831 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-46-219.us-west-2.compute.internal>
Date: Wed, 6 Jul 2022 13:05:10 +0000
Subject: [PATCH 29/30] fixing returning only one instance of class

---
 yolox/utils/boxes.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/yolox/utils/boxes.py b/yolox/utils/boxes.py
index f56bf0679..30920f310 100644
--- a/yolox/utils/boxes.py
+++ b/yolox/utils/boxes.py
@@ -46,6 +46,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
         # Get score and class with highest confidence
         class_confs = image_pred[:, 5: 5 + num_classes]
         top_confs, top_classes = torch.topk(class_confs, num_classes, 1, sorted=True)
+
         class_conf = top_confs[:,0].unsqueeze(1)
         class_pred = top_classes[:,0].unsqueeze(1)
 
@@ -54,7 +55,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
         
         detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
         
-        top_classes = top_classes[conf_mask]
+        top_classes = class_pred[conf_mask]
         detections = detections[conf_mask]
         if not detections.size(0):
             continue
@@ -75,7 +76,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
 
         top_classes = top_classes[nms_out_index]
         detections = detections[nms_out_index]
-        detections = preprocess_double_class_instances(detections, top_classes)
+        detections = process_double_class_instances(detections, top_classes)
 
         if output[i] is None:
             output[i] = detections
@@ -86,21 +87,19 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
     return output
 
 
-def preprocess_double_class_instances(detections, top_classes):
+def process_double_class_instances(detections, top_classes):
     used_class = []
+    processed_detections = []
+    unique_classes = top_classes.unique()
     sorted_dets = sorted(enumerate(detections), key=lambda x:x[1][-2], reverse=True)
     for idx, det in sorted_dets:
         class_id = int(det[-1])
         if class_id not in used_class:
             used_class.append(int(det[-1]))
-            continue
-        else:
-            idx_counter = 0
-            while class_id in used_class:
-                idx_counter += 1
-                class_id = int(top_classes[idx][idx_counter])
-            detections[idx][-1] = float(class_id)
-    return detections
+            processed_detections.append(detections[idx])
+        if len(used_class) == len(unique_classes):
+            break
+    return torch.stack(processed_detections)
                 
 
 def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):

From 2d8bce3b49be83ee1dfa6909c1e017b4e6ca58f9 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-46-219.us-west-2.compute.internal>
Date: Tue, 19 Jul 2022 11:43:21 +0000
Subject: [PATCH 30/30] reverts changeds regardin postprocess (moved to end of
 pipeline)

---
 yolox/utils/boxes.py | 30 ++----------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/yolox/utils/boxes.py b/yolox/utils/boxes.py
index 30920f310..17d6c9eec 100644
--- a/yolox/utils/boxes.py
+++ b/yolox/utils/boxes.py
@@ -44,18 +44,11 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
         if not image_pred.size(0):
             continue
         # Get score and class with highest confidence
-        class_confs = image_pred[:, 5: 5 + num_classes]
-        top_confs, top_classes = torch.topk(class_confs, num_classes, 1, sorted=True)
+        class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True)
 
-        class_conf = top_confs[:,0].unsqueeze(1)
-        class_pred = top_classes[:,0].unsqueeze(1)
-
-        conf_mask = (image_pred[:, 4] * top_confs[:,0].squeeze() >= conf_thre).squeeze()
+        conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze()
         # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
-        
         detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1)
-        
-        top_classes = class_pred[conf_mask]
         detections = detections[conf_mask]
         if not detections.size(0):
             continue
@@ -74,32 +67,13 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn
                 nms_thre,
             )
 
-        top_classes = top_classes[nms_out_index]
         detections = detections[nms_out_index]
-        detections = process_double_class_instances(detections, top_classes)
-
         if output[i] is None:
             output[i] = detections
         else:
             output[i] = torch.cat((output[i], detections))
 
-
     return output
-
-
-def process_double_class_instances(detections, top_classes):
-    used_class = []
-    processed_detections = []
-    unique_classes = top_classes.unique()
-    sorted_dets = sorted(enumerate(detections), key=lambda x:x[1][-2], reverse=True)
-    for idx, det in sorted_dets:
-        class_id = int(det[-1])
-        if class_id not in used_class:
-            used_class.append(int(det[-1]))
-            processed_detections.append(detections[idx])
-        if len(used_class) == len(unique_classes):
-            break
-    return torch.stack(processed_detections)
                 
 
 def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):