From c375b93315565a9f2a7e22d959b359b339761b89 Mon Sep 17 00:00:00 2001 From: Jakub Date: Mon, 9 May 2022 11:54:08 +0200 Subject: [PATCH 01/30] Added support for config --- tools/train.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/train.py b/tools/train.py index b69066fa6..51a6baca4 100644 --- a/tools/train.py +++ b/tools/train.py @@ -41,6 +41,12 @@ def make_parser(): type=str, help="plz input your experiment description file", ) + parser.add_argument( + "--config_filepath", + default=None, + type=str, + help="Filepath to config file", + ) parser.add_argument( "--resume", default=False, action="store_true", help="resume training" ) From c03d5bab9333771c6ba798530a2b66fdec69c442 Mon Sep 17 00:00:00 2001 From: Jakub Date: Mon, 9 May 2022 12:26:05 +0200 Subject: [PATCH 02/30] Added neptune integration --- tools/train.py | 6 ++++++ yolox/core/trainer.py | 2 +- yolox/exp/base_exp.py | 14 +++++++++++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/train.py b/tools/train.py index 51a6baca4..9195fb15e 100644 --- a/tools/train.py +++ b/tools/train.py @@ -7,6 +7,7 @@ import warnings from loguru import logger +import yaml import torch import torch.backends.cudnn as cudnn @@ -131,6 +132,11 @@ def main(exp, args): if not args.experiment_name: args.experiment_name = exp.exp_name + if args.config_filepath is not None: + with open(args.config_filepath, "r") as f: + config = yaml.safe_load(f) + exp.add_params_from_config(config, use_neptune=True) + num_gpu = get_num_devices() if args.devices is None else args.devices assert num_gpu <= get_num_devices() diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index a9ee2a681..b0b0afeb3 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -38,7 +38,7 @@ def __init__(self, exp, args): # before_train methods. self.exp = exp self.args = args - + self.neptune = self.exp.neptune # training related attr self.max_epoch = exp.max_epoch self.amp_training = args.fp16 diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py index e26ae079c..127c5de07 100644 --- a/yolox/exp/base_exp.py +++ b/yolox/exp/base_exp.py @@ -8,6 +8,7 @@ from typing import Dict from tabulate import tabulate +import neptune.new as neptune import torch from torch.nn import Module @@ -22,7 +23,10 @@ def __init__(self): self.output_dir = "./YOLOX_outputs" self.print_interval = 100 self.eval_interval = 10 - + self.neptune = neptune.init( + project="jakub.pingielski/b-yond", + api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NTlkYzZmZC1kZTY5LTQ2NjMtODFkZC04YmY4NTNmYTkwMTIifQ==", + ) @abstractmethod def get_model(self) -> Module: pass @@ -73,3 +77,11 @@ def merge(self, cfg_list): except Exception: v = ast.literal_eval(v) setattr(self, k, v) + + def add_params_from_config(self, config: dict, use_neptune: bool = True): + for key, value in config.items(): + setattr(self, key, value) + if use_neptune and self.neptune: + self.neptune[f"config/{key}"].log(value) + + From c56359c040db0de33ce0ad4ec9d8f0f114c6b257 Mon Sep 17 00:00:00 2001 From: Jakub Date: Tue, 10 May 2022 11:30:41 +0200 Subject: [PATCH 03/30] artifact logging --- tools/train.py | 2 +- yolox/core/trainer.py | 2 ++ yolox/exp/yolox_base.py | 2 +- yolox/utils/checkpoint.py | 7 ++++++- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/train.py b/tools/train.py index 9195fb15e..abdb55f08 100644 --- a/tools/train.py +++ b/tools/train.py @@ -136,7 +136,7 @@ def main(exp, args): with open(args.config_filepath, "r") as f: config = yaml.safe_load(f) exp.add_params_from_config(config, use_neptune=True) - + exp.neptune.log_artifact(args.config_filepath) num_gpu = get_num_devices() if args.devices is None else args.devices assert num_gpu <= get_num_devices() diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index b0b0afeb3..1b79c53fb 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -364,3 +364,5 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False): if self.args.logger == "wandb": self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt) + if self.neptune: + self.neptune.log_artefact() diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py index 611b25825..5d0496f49 100644 --- a/yolox/exp/yolox_base.py +++ b/yolox/exp/yolox_base.py @@ -94,7 +94,7 @@ def __init__(self): self.eval_interval = 10 # save history checkpoint or not. # If set to False, yolox will only save latest and best ckpt. - self.save_history_ckpt = True + self.save_history_ckpt = False # name of experiment self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py index a0c200e41..2d6fa4226 100644 --- a/yolox/utils/checkpoint.py +++ b/yolox/utils/checkpoint.py @@ -4,6 +4,7 @@ import os import shutil from loguru import logger +import neptune.new as neptune import torch @@ -33,7 +34,7 @@ def load_ckpt(model, ckpt): return model -def save_checkpoint(state, is_best, save_dir, model_name=""): +def save_checkpoint(state, is_best, save_dir, model_name, neptune): if not os.path.exists(save_dir): os.makedirs(save_dir) filename = os.path.join(save_dir, model_name + "_ckpt.pth") @@ -41,3 +42,7 @@ def save_checkpoint(state, is_best, save_dir, model_name=""): if is_best: best_filename = os.path.join(save_dir, "best_ckpt.pth") shutil.copyfile(filename, best_filename) + if neptune: + neptune.log_artifact(best_filename) + + From a0f589c505fa8e1164e42b442ecf5e1a35ecfe9a Mon Sep 17 00:00:00 2001 From: Jakub Date: Tue, 10 May 2022 11:43:44 +0200 Subject: [PATCH 04/30] artifact logging --- tools/train.py | 1 + yolox/core/trainer.py | 4 ++-- yolox/utils/checkpoint.py | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/train.py b/tools/train.py index abdb55f08..9a833d03a 100644 --- a/tools/train.py +++ b/tools/train.py @@ -137,6 +137,7 @@ def main(exp, args): config = yaml.safe_load(f) exp.add_params_from_config(config, use_neptune=True) exp.neptune.log_artifact(args.config_filepath) + exp.neptune['config'].track_files(args.config_filepath) num_gpu = get_num_devices() if args.devices is None else args.devices assert num_gpu <= get_num_devices() diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index 1b79c53fb..3416a85fe 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -360,9 +360,9 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False): update_best_ckpt, self.file_name, ckpt_name, + self.neptune, ) if self.args.logger == "wandb": self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt) - if self.neptune: - self.neptune.log_artefact() + diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py index 2d6fa4226..74995d038 100644 --- a/yolox/utils/checkpoint.py +++ b/yolox/utils/checkpoint.py @@ -44,5 +44,7 @@ def save_checkpoint(state, is_best, save_dir, model_name, neptune): shutil.copyfile(filename, best_filename) if neptune: neptune.log_artifact(best_filename) + neptune['best_checkpoint'].track_files(best_filename) + From dbc43f89a80a200e7b1377b7dc58c78115efa39c Mon Sep 17 00:00:00 2001 From: Jakub Date: Tue, 10 May 2022 11:53:13 +0200 Subject: [PATCH 05/30] artifact logging --- tools/train.py | 2 +- yolox/utils/checkpoint.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/train.py b/tools/train.py index 9a833d03a..46eeff52f 100644 --- a/tools/train.py +++ b/tools/train.py @@ -136,7 +136,7 @@ def main(exp, args): with open(args.config_filepath, "r") as f: config = yaml.safe_load(f) exp.add_params_from_config(config, use_neptune=True) - exp.neptune.log_artifact(args.config_filepath) + print("saving config from", args.config_filepath) exp.neptune['config'].track_files(args.config_filepath) num_gpu = get_num_devices() if args.devices is None else args.devices assert num_gpu <= get_num_devices() diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py index 74995d038..5595844bf 100644 --- a/yolox/utils/checkpoint.py +++ b/yolox/utils/checkpoint.py @@ -42,9 +42,8 @@ def save_checkpoint(state, is_best, save_dir, model_name, neptune): if is_best: best_filename = os.path.join(save_dir, "best_ckpt.pth") shutil.copyfile(filename, best_filename) - if neptune: - neptune.log_artifact(best_filename) - neptune['best_checkpoint'].track_files(best_filename) + print("saving best checkpoint to ", best_filename) + neptune['best_checkpoint'].track_files(best_filename) From 3fe9cd281160e99d388cc2faa6e926e28680a73c Mon Sep 17 00:00:00 2001 From: Jakub Date: Tue, 10 May 2022 13:43:22 +0200 Subject: [PATCH 06/30] refactor --- tools/train.py | 1 - yolox/utils/checkpoint.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tools/train.py b/tools/train.py index 46eeff52f..a7d40028d 100644 --- a/tools/train.py +++ b/tools/train.py @@ -136,7 +136,6 @@ def main(exp, args): with open(args.config_filepath, "r") as f: config = yaml.safe_load(f) exp.add_params_from_config(config, use_neptune=True) - print("saving config from", args.config_filepath) exp.neptune['config'].track_files(args.config_filepath) num_gpu = get_num_devices() if args.devices is None else args.devices assert num_gpu <= get_num_devices() diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py index 5595844bf..00ae8c5f3 100644 --- a/yolox/utils/checkpoint.py +++ b/yolox/utils/checkpoint.py @@ -42,7 +42,6 @@ def save_checkpoint(state, is_best, save_dir, model_name, neptune): if is_best: best_filename = os.path.join(save_dir, "best_ckpt.pth") shutil.copyfile(filename, best_filename) - print("saving best checkpoint to ", best_filename) neptune['best_checkpoint'].track_files(best_filename) From 586d3ede0c2de483bbd9838a2ef45080272187bb Mon Sep 17 00:00:00 2001 From: Jakub Date: Tue, 10 May 2022 14:06:50 +0200 Subject: [PATCH 07/30] refactor --- yolox/utils/checkpoint.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py index 00ae8c5f3..50da303a6 100644 --- a/yolox/utils/checkpoint.py +++ b/yolox/utils/checkpoint.py @@ -34,7 +34,7 @@ def load_ckpt(model, ckpt): return model -def save_checkpoint(state, is_best, save_dir, model_name, neptune): +def save_checkpoint(state, is_best, save_dir, model_name="", neptune=None): if not os.path.exists(save_dir): os.makedirs(save_dir) filename = os.path.join(save_dir, model_name + "_ckpt.pth") @@ -42,7 +42,5 @@ def save_checkpoint(state, is_best, save_dir, model_name, neptune): if is_best: best_filename = os.path.join(save_dir, "best_ckpt.pth") shutil.copyfile(filename, best_filename) - neptune['best_checkpoint'].track_files(best_filename) - - - + if neptune: + neptune['best_checkpoint'].track_files(best_filename) From da1f121df589092323de3d73f24bf229d25291ac Mon Sep 17 00:00:00 2001 From: Jakub Date: Thu, 12 May 2022 11:52:24 +0200 Subject: [PATCH 08/30] add more metrics --- tools/train.py | 2 +- yolox/core/trainer.py | 6 ++++-- yolox/exp/base_exp.py | 8 ++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/train.py b/tools/train.py index a7d40028d..fb2b35717 100644 --- a/tools/train.py +++ b/tools/train.py @@ -136,7 +136,7 @@ def main(exp, args): with open(args.config_filepath, "r") as f: config = yaml.safe_load(f) exp.add_params_from_config(config, use_neptune=True) - exp.neptune['config'].track_files(args.config_filepath) + exp.neptune['config_file'].track_files(args.config_filepath) num_gpu = get_num_devices() if args.devices is None else args.devices assert num_gpu <= get_num_devices() diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index 3416a85fe..6048101ee 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -114,6 +114,7 @@ def train_one_iter(self): self.ema_model.update(self.model) lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1) + self.neptune['lr'].log(lr) for param_group in self.optimizer.param_groups: param_group["lr"] = lr @@ -243,7 +244,8 @@ def after_iter(self): loss_str = ", ".join( ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()] ) - + for loss_name, loss_value in loss_meter.items(): + self.neptune[loss_name].log(loss_value.latest()) time_meter = self.meter.get_filtered_meter("time") time_str = ", ".join( ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()] @@ -327,7 +329,7 @@ def evaluate_and_save_model(self): update_best_ckpt = ap50_95 > self.best_ap self.best_ap = max(self.best_ap, ap50_95) - + self.neptune['best_ap'].log(self.best_ap) if self.rank == 0: if self.args.logger == "tensorboard": self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1) diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py index 127c5de07..e76400b17 100644 --- a/yolox/exp/base_exp.py +++ b/yolox/exp/base_exp.py @@ -13,7 +13,7 @@ from torch.nn import Module from yolox.utils import LRScheduler - +from paths import DATASETS_PATH class BaseExp(metaclass=ABCMeta): """Basic class for any experiment.""" @@ -80,7 +80,11 @@ def merge(self, cfg_list): def add_params_from_config(self, config: dict, use_neptune: bool = True): for key, value in config.items(): - setattr(self, key, value) + if key == "dataset_version": + value = DATASETS_PATH / key + setattr("dataset_dir", value) + else: + setattr(self, key, value) if use_neptune and self.neptune: self.neptune[f"config/{key}"].log(value) From 9415f617129caf6863a03d1ebd77e679a24c112d Mon Sep 17 00:00:00 2001 From: Jakub Date: Thu, 12 May 2022 12:20:42 +0200 Subject: [PATCH 09/30] bug fix --- yolox/core/trainer.py | 6 +++--- yolox/exp/base_exp.py | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index 6048101ee..94b18024e 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -114,7 +114,7 @@ def train_one_iter(self): self.ema_model.update(self.model) lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1) - self.neptune['lr'].log(lr) + self.neptune['config/lr'].log(lr) for param_group in self.optimizer.param_groups: param_group["lr"] = lr @@ -245,7 +245,7 @@ def after_iter(self): ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()] ) for loss_name, loss_value in loss_meter.items(): - self.neptune[loss_name].log(loss_value.latest()) + self.neptune[f"loss/{loss_name}"].log(loss_value.latest) time_meter = self.meter.get_filtered_meter("time") time_str = ", ".join( ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()] @@ -329,7 +329,7 @@ def evaluate_and_save_model(self): update_best_ckpt = ap50_95 > self.best_ap self.best_ap = max(self.best_ap, ap50_95) - self.neptune['best_ap'].log(self.best_ap) + self.neptune['metrics/best_ap'].log(self.best_ap) if self.rank == 0: if self.args.logger == "tensorboard": self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1) diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py index e76400b17..479cd3414 100644 --- a/yolox/exp/base_exp.py +++ b/yolox/exp/base_exp.py @@ -81,8 +81,7 @@ def merge(self, cfg_list): def add_params_from_config(self, config: dict, use_neptune: bool = True): for key, value in config.items(): if key == "dataset_version": - value = DATASETS_PATH / key - setattr("dataset_dir", value) + setattr(self, "dataset_dir", DATASETS_PATH / value) else: setattr(self, key, value) if use_neptune and self.neptune: From 9e7c5b84179715bcc838c5fc530d115cc5957077 Mon Sep 17 00:00:00 2001 From: Dawid Stachowiak Date: Thu, 19 May 2022 16:01:14 +0200 Subject: [PATCH 10/30] fixes in training and neptune logging --- tools/train.py | 7 ++++++- yolox/exp/base_exp.py | 17 +++++++++++++---- yolox/utils/checkpoint.py | 2 +- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/tools/train.py b/tools/train.py index fb2b35717..cfbf19954 100644 --- a/tools/train.py +++ b/tools/train.py @@ -127,6 +127,11 @@ def main(exp, args): if __name__ == "__main__": args = make_parser().parse_args() exp = get_exp(args.exp_file, args.name) + + #TODO: Add neptune logging with multidevice training. Logging now works only + # on 1 gpu device training, not working with multiprocessing. + exp.set_neptune_logging(True) + exp.merge(args.opts) if not args.experiment_name: @@ -136,7 +141,7 @@ def main(exp, args): with open(args.config_filepath, "r") as f: config = yaml.safe_load(f) exp.add_params_from_config(config, use_neptune=True) - exp.neptune['config_file'].track_files(args.config_filepath) + exp.neptune['config_file'].upload(args.config_filepath) num_gpu = get_num_devices() if args.devices is None else args.devices assert num_gpu <= get_num_devices() diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py index 479cd3414..1aabd0d51 100644 --- a/yolox/exp/base_exp.py +++ b/yolox/exp/base_exp.py @@ -23,10 +23,8 @@ def __init__(self): self.output_dir = "./YOLOX_outputs" self.print_interval = 100 self.eval_interval = 10 - self.neptune = neptune.init( - project="jakub.pingielski/b-yond", - api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NTlkYzZmZC1kZTY5LTQ2NjMtODFkZC04YmY4NTNmYTkwMTIifQ==", - ) + self.neptune = None + @abstractmethod def get_model(self) -> Module: pass @@ -64,6 +62,17 @@ def __repr__(self): ] return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid") + def set_neptune_logging(self, state): + if state: + self.neptune = neptune.init( + project="jakub.pingielski/b-yond", + api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NTlkYzZmZC1kZTY5LTQ2NjMtODFkZC04YmY4NTNmYTkwMTIifQ==", + ) + else: + if self.neptune is not None: + self.neptune.stop() + self.neptune = None + def merge(self, cfg_list): assert len(cfg_list) % 2 == 0 for k, v in zip(cfg_list[0::2], cfg_list[1::2]): diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py index 50da303a6..e7d732bef 100644 --- a/yolox/utils/checkpoint.py +++ b/yolox/utils/checkpoint.py @@ -43,4 +43,4 @@ def save_checkpoint(state, is_best, save_dir, model_name="", neptune=None): best_filename = os.path.join(save_dir, "best_ckpt.pth") shutil.copyfile(filename, best_filename) if neptune: - neptune['best_checkpoint'].track_files(best_filename) + neptune['best_checkpoint'].upload(best_filename) From d26a6ca5600528e1eaad05781e6789cee56d4c81 Mon Sep 17 00:00:00 2001 From: Aditya-Bobade Date: Thu, 19 May 2022 21:38:01 +0530 Subject: [PATCH 11/30] validation loss logging --- yolox/core/trainer.py | 38 ++++++++++++++++++++++++++++ yolox/exp/yolox_base.py | 56 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index 94b18024e..f01c85aac 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -152,10 +152,18 @@ def before_train(self): no_aug=self.no_aug, cache_img=self.args.cache, ) + self.val_loader = self.exp.get_val_loader( + batch_size=self.args.batch_size, + is_distributed=self.is_distributed, + no_aug=False, + cache_img=self.args.cache, + ) logger.info("init prefetcher, this might take one minute or less...") self.prefetcher = DataPrefetcher(self.train_loader) + self.val_prefetcher = DataPrefetcher(self.val_loader) # max_iter means iters per epoch self.max_iter = len(self.train_loader) + self.max_val_iter = len(self.val_loader) self.lr_scheduler = self.exp.get_lr_scheduler( self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter @@ -315,6 +323,9 @@ def resume_train(self, model): return model def evaluate_and_save_model(self): + # calculate loss + self.calculate_eval_loss() + if self.use_model_ema: evalmodel = self.ema_model.ema else: @@ -368,3 +379,30 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False): if self.args.logger == "wandb": self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt) + def calculate_eval_loss(self): + for iter in range(self.max_val_iter): + inps, targets = self.val_prefetcher.next() + inps = inps.to(self.data_type) + targets = targets.to(self.data_type) + targets.requires_grad = False + inps, targets = self.exp.preprocess(inps, targets, self.input_size) + + with torch.cuda.amp.autocast(enabled=self.amp_training): + outputs = self.model(inps, targets) + + loss = { + "total_loss": outputs["total_loss"], + "iou_loss": outputs["iou_loss"], + "l1_loss": outputs["l1_loss"], + "conf_loss": outputs["conf_loss"], + "cls_loss": outputs["cls_loss"] + } + progress_str = "epoch: {}/{}, iter: {}/{},".format( + self.epoch + 1, self.max_epoch, iter + 1, self.max_val_iter + ) + + for loss_name, loss_value in loss.items(): + progress_str += " {}: {:.1f}".format(loss_name, loss_value) + self.neptune[f"loss/val/{loss_name}"].log(loss_value) + + logger.info("Validation:{}".format(progress_str)) diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py index 5d0496f49..097a9c5e3 100644 --- a/yolox/exp/yolox_base.py +++ b/yolox/exp/yolox_base.py @@ -201,6 +201,62 @@ def get_data_loader( return train_loader + def get_val_loader( + self, batch_size, is_distributed, no_aug=False, cache_img=False, testdev=False + ): + from yolox.data import ( + COCODataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + worker_init_reset_seed, + ) + from yolox.utils import ( + wait_for_the_master, + get_local_rank, + ) + + local_rank = get_local_rank() + + with wait_for_the_master(local_rank): + dataset = COCODataset( + data_dir=self.data_dir, + json_file=self.val_ann if not testdev else self.test_ann, + img_size=self.input_size, + preproc=TrainTransform( + max_labels=50, + flip_prob=0.0, + hsv_prob=0.0), + cache=cache_img, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + + # Make sure each process has different random seed, especially for 'fork' method. + # Check https://github.com/pytorch/pytorch/issues/63311 for more details. + dataloader_kwargs["worker_init_fn"] = worker_init_reset_seed + + val_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return val_loader + def random_resize(self, data_loader, epoch, rank, is_distributed): tensor = torch.LongTensor(2).cuda() From e53d2bc5343490fc4d1e76a3421972fe10ee1c44 Mon Sep 17 00:00:00 2001 From: Aditya-Bobade Date: Fri, 20 May 2022 14:28:16 +0530 Subject: [PATCH 12/30] flag for validation loss logging --- yolox/core/trainer.py | 25 ++++++++++++++++--------- yolox/exp/yolox_base.py | 2 ++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index f01c85aac..3dedda515 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -55,6 +55,9 @@ def __init__(self, exp, args): self.input_size = exp.input_size self.best_ap = 0 + # validation loss + self.calc_validation_loss = exp.calc_val_loss + # metric record self.meter = MeterBuffer(window_size=exp.print_interval) self.file_name = os.path.join(exp.output_dir, args.experiment_name) @@ -152,18 +155,21 @@ def before_train(self): no_aug=self.no_aug, cache_img=self.args.cache, ) - self.val_loader = self.exp.get_val_loader( - batch_size=self.args.batch_size, - is_distributed=self.is_distributed, - no_aug=False, - cache_img=self.args.cache, - ) + if self.calc_validation_loss: + self.val_loader = self.exp.get_val_loader( + batch_size=self.args.batch_size, + is_distributed=self.is_distributed, + no_aug=False, + cache_img=self.args.cache, + ) logger.info("init prefetcher, this might take one minute or less...") self.prefetcher = DataPrefetcher(self.train_loader) - self.val_prefetcher = DataPrefetcher(self.val_loader) + if self.calc_validation_loss: + self.val_prefetcher = DataPrefetcher(self.val_loader) # max_iter means iters per epoch self.max_iter = len(self.train_loader) - self.max_val_iter = len(self.val_loader) + if self.calc_validation_loss: + self.max_val_iter = len(self.val_loader) self.lr_scheduler = self.exp.get_lr_scheduler( self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter @@ -324,7 +330,8 @@ def resume_train(self, model): def evaluate_and_save_model(self): # calculate loss - self.calculate_eval_loss() + if self.calc_validation_loss: + self.calculate_eval_loss() if self.use_model_ema: evalmodel = self.ema_model.ema diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py index 097a9c5e3..51d9337c4 100644 --- a/yolox/exp/yolox_base.py +++ b/yolox/exp/yolox_base.py @@ -81,6 +81,8 @@ def __init__(self): self.no_aug_epochs = 15 # apply EMA during training self.ema = True + # calculate validation loss + self.calc_val_loss = False # weight decay of optimizer self.weight_decay = 5e-4 From da181cfbfdd58ef7398d8f1d63e7df4280b649e8 Mon Sep 17 00:00:00 2001 From: Aditya-Bobade Date: Fri, 20 May 2022 17:00:17 +0530 Subject: [PATCH 13/30] average validation loss logging --- yolox/core/trainer.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index 3dedda515..4d400e66f 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -5,6 +5,7 @@ import datetime import os import time +import numpy as np from loguru import logger import torch @@ -387,6 +388,13 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False): self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt) def calculate_eval_loss(self): + loss = { + "total_loss": [], + "iou_loss": [], + "l1_loss": [], + "conf_loss": [], + "cls_loss": [] + } for iter in range(self.max_val_iter): inps, targets = self.val_prefetcher.next() inps = inps.to(self.data_type) @@ -397,19 +405,18 @@ def calculate_eval_loss(self): with torch.cuda.amp.autocast(enabled=self.amp_training): outputs = self.model(inps, targets) - loss = { - "total_loss": outputs["total_loss"], - "iou_loss": outputs["iou_loss"], - "l1_loss": outputs["l1_loss"], - "conf_loss": outputs["conf_loss"], - "cls_loss": outputs["cls_loss"] - } - progress_str = "epoch: {}/{}, iter: {}/{},".format( - self.epoch + 1, self.max_epoch, iter + 1, self.max_val_iter - ) + loss["total_loss"].append(outputs["total_loss"]) + loss["iou_loss"].append(outputs["iou_loss"]) + loss["l1_loss"].append(outputs["l1_loss"]) + loss["conf_loss"].append(outputs["conf_loss"]) + loss["cls_loss"].append(outputs["cls_loss"]) + + progress_str = "epoch: {}/{},".format( + self.epoch + 1, self.max_epoch + ) - for loss_name, loss_value in loss.items(): - progress_str += " {}: {:.1f}".format(loss_name, loss_value) - self.neptune[f"loss/val/{loss_name}"].log(loss_value) + for loss_name, loss_value in loss.items(): + progress_str += " {}: {:.1f}".format(loss_name, np.nanmean(loss_value)) + self.neptune[f"loss/val/{loss_name}"].log(np.nanmean(loss_value)) - logger.info("Validation:{}".format(progress_str)) + logger.info("Validation:{}".format(progress_str)) From 47eccabd586bfea33d893a50e07a24fb08bcc38e Mon Sep 17 00:00:00 2001 From: Aditya-Bobade Date: Fri, 20 May 2022 19:58:48 +0530 Subject: [PATCH 14/30] remove average validation loss logging --- yolox/core/trainer.py | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index 4d400e66f..215e47c5d 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -5,7 +5,6 @@ import datetime import os import time -import numpy as np from loguru import logger import torch @@ -388,35 +387,25 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False): self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt) def calculate_eval_loss(self): - loss = { - "total_loss": [], - "iou_loss": [], - "l1_loss": [], - "conf_loss": [], - "cls_loss": [] - } for iter in range(self.max_val_iter): inps, targets = self.val_prefetcher.next() inps = inps.to(self.data_type) targets = targets.to(self.data_type) targets.requires_grad = False inps, targets = self.exp.preprocess(inps, targets, self.input_size) - with torch.cuda.amp.autocast(enabled=self.amp_training): outputs = self.model(inps, targets) - - loss["total_loss"].append(outputs["total_loss"]) - loss["iou_loss"].append(outputs["iou_loss"]) - loss["l1_loss"].append(outputs["l1_loss"]) - loss["conf_loss"].append(outputs["conf_loss"]) - loss["cls_loss"].append(outputs["cls_loss"]) - - progress_str = "epoch: {}/{},".format( - self.epoch + 1, self.max_epoch - ) - - for loss_name, loss_value in loss.items(): - progress_str += " {}: {:.1f}".format(loss_name, np.nanmean(loss_value)) - self.neptune[f"loss/val/{loss_name}"].log(np.nanmean(loss_value)) - - logger.info("Validation:{}".format(progress_str)) + loss = { + "total_loss": outputs["total_loss"], + "iou_loss": outputs["iou_loss"], + "l1_loss": outputs["l1_loss"], + "conf_loss": outputs["conf_loss"], + "cls_loss": outputs["cls_loss"] + } + progress_str = "epoch: {}/{}, iter: {}/{},".format( + self.epoch + 1, self.max_epoch, iter + 1, self.max_val_iter + ) + for loss_name, loss_value in loss.items(): + progress_str += " {}: {:.1f},".format(loss_name, loss_value) + self.neptune[f"loss/val/{loss_name}"].log(loss_value) + logger.info("Validation:{}".format(progress_str)) From 1c31df47418a12f29ec18fb3783380200ae16ef8 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 May 2022 11:10:55 +0000 Subject: [PATCH 15/30] mosaic_prob !=1 bug fix --- yolox/data/datasets/mosaicdetection.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yolox/data/datasets/mosaicdetection.py b/yolox/data/datasets/mosaicdetection.py index 708babed5..25a0b0625 100644 --- a/yolox/data/datasets/mosaicdetection.py +++ b/yolox/data/datasets/mosaicdetection.py @@ -6,6 +6,7 @@ import cv2 import numpy as np +import torch from yolox.utils import adjust_box_anns, get_local_rank @@ -151,6 +152,7 @@ def __getitem__(self, idx): # img_info and img_id are not used for training. # They are also hard to be specified on a mosaic image. # ----------------------------------------------------------------- + img_id = torch.tensor(np.array(img_id), dtype=torch.long) return mix_img, padded_labels, img_info, img_id else: From c8fcf89f078706b0c6550b37b7cfbbbb9ae114ca Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 31 May 2022 14:10:51 +0000 Subject: [PATCH 16/30] adding copy paste augmentations --- yolox/data/data_augment.py | 39 ++++++++++++++++++++++++++ yolox/data/datasets/mosaicdetection.py | 23 +++++++++++++-- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/yolox/data/data_augment.py b/yolox/data/data_augment.py index 21cd7b56d..587c3b215 100644 --- a/yolox/data/data_augment.py +++ b/yolox/data/data_augment.py @@ -157,6 +157,45 @@ def preproc(img, input_size, swap=(2, 0, 1)): padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) return padded_img, r +def copy_paste(img, paste_img, labels, paste_labels, prob=0.5, obj_proc=0.5): + img_h, img_w = img.shape[:2] + paste_labels = paste_labels.astype(int) + objects_to_paste = paste_labels[random.sample( + range(0, len(paste_labels) - 1), int(len(paste_labels) * obj_proc) + )] + if len(objects_to_paste) == 0: + return img, labels + cropped_objects = { + idx: paste_img[object[1]:object[3], object[0]:object[2]] + for idx, object in enumerate(objects_to_paste) + } + #50% chance to flip the object + for idx, obj in cropped_objects.items(): + if random.random() > 0.5: + cropped_objects[idx] = obj[:,::-1] + new_coords = { + idx: ( + random.randint(0, img_w - (object[2] - object[0])), + random.randint(0, img_h - (object[3] - object[1])) + ) + for idx, object in enumerate(objects_to_paste) + } + new_labels = [] + for idx, coords in new_coords.items(): + new_labels.append(np.array([ + coords[0], + coords[1], + coords[0] + objects_to_paste[idx][2]- objects_to_paste[idx][0], + coords[1] + objects_to_paste[idx][3]- objects_to_paste[idx][1], + objects_to_paste[idx][4] + ])) + for idx, object in enumerate(new_labels): + img[object[1]:object[3], object[0]:object[2]] = cropped_objects[idx] + labels = np.append(labels, new_labels, 0) + return img, labels + + + class TrainTransform: def __init__(self, max_labels=50, flip_prob=0.5, hsv_prob=1.0): diff --git a/yolox/data/datasets/mosaicdetection.py b/yolox/data/datasets/mosaicdetection.py index 25a0b0625..014ec4d3d 100644 --- a/yolox/data/datasets/mosaicdetection.py +++ b/yolox/data/datasets/mosaicdetection.py @@ -9,7 +9,7 @@ import torch from yolox.utils import adjust_box_anns, get_local_rank - +from yolox.data.data_augment import copy_paste from ..data_augment import random_affine from .datasets_wrapper import Dataset @@ -42,7 +42,8 @@ def __init__( self, dataset, img_size, mosaic=True, preproc=None, degrees=10.0, translate=0.1, mosaic_scale=(0.5, 1.5), mixup_scale=(0.5, 1.5), shear=2.0, enable_mixup=True, - mosaic_prob=1.0, mixup_prob=1.0, *args + mosaic_prob=1.0, mixup_prob=1.0, copy_paste_prob=0.5, + copy_paste_obj_proc=0.5, *args ): """ @@ -65,6 +66,8 @@ def __init__( self.degrees = degrees self.translate = translate self.scale = mosaic_scale + self.copy_paste_prob = copy_paste_prob + self.copy_paste_prob = copy_paste_obj_proc self.shear = shear self.mixup_scale = mixup_scale self.enable_mosaic = mosaic @@ -92,6 +95,14 @@ def __getitem__(self, idx): for i_mosaic, index in enumerate(indices): img, _labels, _, img_id = self._dataset.pull_item(index) + + if self.copy_paste_prob is not None and self.copy_paste_prob!=0.0: + random_idx = index + while random_idx==index: + random_idx = random.randint(0, len(self._dataset.annotations)-1) + paste_img, paste_label, _, _ = self._dataset.pull_item(random_idx) + img, _labels = copy_paste(img, paste_img, _labels, paste_label, self.copy_paste_prob) + h0, w0 = img.shape[:2] # orig hw scale = min(1. * input_h / h0, 1. * input_w / w0) img = cv2.resize( @@ -158,6 +169,14 @@ def __getitem__(self, idx): else: self._dataset._input_dim = self.input_dim img, label, img_info, img_id = self._dataset.pull_item(idx) + + if self.copy_paste_prob is not None and self.copy_paste_prob!=0.0: + random_idx = idx + while random_idx==idx: + random_idx = random.randint(0, len(self._dataset.annotations)-1) + paste_img, paste_label, _, _ = self._dataset.pull_item(random_idx) + img, label = copy_paste(img, paste_img, label, paste_label, self.copy_paste_prob) + img, label = self.preproc(img, label, self.input_dim) return img, label, img_info, img_id From 3343d073834d06ef60d3ad45b5faceb7c7f870ae Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 31 May 2022 14:48:10 +0000 Subject: [PATCH 17/30] refactor --- yolox/data/data_augment.py | 66 ++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/yolox/data/data_augment.py b/yolox/data/data_augment.py index 587c3b215..5952026d7 100644 --- a/yolox/data/data_augment.py +++ b/yolox/data/data_augment.py @@ -158,43 +158,39 @@ def preproc(img, input_size, swap=(2, 0, 1)): return padded_img, r def copy_paste(img, paste_img, labels, paste_labels, prob=0.5, obj_proc=0.5): - img_h, img_w = img.shape[:2] - paste_labels = paste_labels.astype(int) - objects_to_paste = paste_labels[random.sample( - range(0, len(paste_labels) - 1), int(len(paste_labels) * obj_proc) - )] - if len(objects_to_paste) == 0: - return img, labels - cropped_objects = { - idx: paste_img[object[1]:object[3], object[0]:object[2]] - for idx, object in enumerate(objects_to_paste) - } - #50% chance to flip the object - for idx, obj in cropped_objects.items(): - if random.random() > 0.5: - cropped_objects[idx] = obj[:,::-1] - new_coords = { - idx: ( - random.randint(0, img_w - (object[2] - object[0])), - random.randint(0, img_h - (object[3] - object[1])) - ) - for idx, object in enumerate(objects_to_paste) - } - new_labels = [] - for idx, coords in new_coords.items(): - new_labels.append(np.array([ - coords[0], - coords[1], - coords[0] + objects_to_paste[idx][2]- objects_to_paste[idx][0], - coords[1] + objects_to_paste[idx][3]- objects_to_paste[idx][1], - objects_to_paste[idx][4] - ])) - for idx, object in enumerate(new_labels): - img[object[1]:object[3], object[0]:object[2]] = cropped_objects[idx] - labels = np.append(labels, new_labels, 0) + if random.random() > prob: + img_h, img_w = img.shape[:2] + paste_labels = paste_labels.astype(int) + objects_to_paste = paste_labels[random.sample( + range(0, len(paste_labels) - 1), int(len(paste_labels) * obj_proc) + )] + if len(objects_to_paste) == 0: + return img, labels + cropped_objects = [ + paste_img[object[1]:object[3], object[0]:object[2]] + for object in objects_to_paste + ] + #50% chance to flip the object + for idx, obj in enumerate(cropped_objects): + if random.random() > 0.5: + cropped_objects[idx] = obj[:,::-1] + new_labels = [] + for idx, object in enumerate(objects_to_paste): + new_x = random.randint(0, img_w - (object[2] - object[0])) + new_y = random.randint(0, img_h - (object[3] - object[1])) + new_labels.append(np.array([ + new_x, + new_y, + new_x + (object[2] - object[0]), + new_y + (object[3] - object[1]), + object[4] + ])) + for idx, object in enumerate(new_labels): + img[object[1]:object[3], object[0]:object[2]] = cropped_objects[idx] + labels = np.append(labels, new_labels, 0) + breakpoint() return img, labels - class TrainTransform: From 4ab2e63f96c03f8872e7093b0ea4966fa824a141 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 1 Jun 2022 09:29:17 +0000 Subject: [PATCH 18/30] improving speed --- yolox/data/data_augment.py | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/yolox/data/data_augment.py b/yolox/data/data_augment.py index 5952026d7..805ef876a 100644 --- a/yolox/data/data_augment.py +++ b/yolox/data/data_augment.py @@ -166,29 +166,20 @@ def copy_paste(img, paste_img, labels, paste_labels, prob=0.5, obj_proc=0.5): )] if len(objects_to_paste) == 0: return img, labels - cropped_objects = [ - paste_img[object[1]:object[3], object[0]:object[2]] - for object in objects_to_paste - ] - #50% chance to flip the object - for idx, obj in enumerate(cropped_objects): - if random.random() > 0.5: - cropped_objects[idx] = obj[:,::-1] new_labels = [] - for idx, object in enumerate(objects_to_paste): - new_x = random.randint(0, img_w - (object[2] - object[0])) - new_y = random.randint(0, img_h - (object[3] - object[1])) + for obj in objects_to_paste: + cropped_obj = paste_img[obj[1]:obj[3], obj[0]:obj[2]] + if random.random() > 0.5: + cropped_obj = cropped_obj[:,::-1] + new_x_min = random.randint(0, img_w - (obj[2] - obj[0])) + new_y_min = random.randint(0, img_h - (obj[3] - obj[1])) + new_x_max = new_x_min + (obj[2] - obj[0]) + new_y_max = new_y_min + (obj[3] - obj[1]) new_labels.append(np.array([ - new_x, - new_y, - new_x + (object[2] - object[0]), - new_y + (object[3] - object[1]), - object[4] - ])) - for idx, object in enumerate(new_labels): - img[object[1]:object[3], object[0]:object[2]] = cropped_objects[idx] + new_x_min, new_y_min, new_x_max, new_y_max, obj[4] + ])) + img[new_y_min:new_y_max, new_x_min:new_x_max] = cropped_obj labels = np.append(labels, new_labels, 0) - breakpoint() return img, labels From 1a8b0dced1d64ebb2d5fe0d09f9413535d162709 Mon Sep 17 00:00:00 2001 From: Dawid Stachowiak Date: Fri, 3 Jun 2022 14:20:41 +0200 Subject: [PATCH 19/30] added postprocessing --- yolox/utils/boxes.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/yolox/utils/boxes.py b/yolox/utils/boxes.py index dbe10d957..f56bf0679 100644 --- a/yolox/utils/boxes.py +++ b/yolox/utils/boxes.py @@ -44,11 +44,17 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn if not image_pred.size(0): continue # Get score and class with highest confidence - class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) + class_confs = image_pred[:, 5: 5 + num_classes] + top_confs, top_classes = torch.topk(class_confs, num_classes, 1, sorted=True) + class_conf = top_confs[:,0].unsqueeze(1) + class_pred = top_classes[:,0].unsqueeze(1) - conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() + conf_mask = (image_pred[:, 4] * top_confs[:,0].squeeze() >= conf_thre).squeeze() # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) + detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) + + top_classes = top_classes[conf_mask] detections = detections[conf_mask] if not detections.size(0): continue @@ -67,15 +73,36 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn nms_thre, ) + top_classes = top_classes[nms_out_index] detections = detections[nms_out_index] + detections = preprocess_double_class_instances(detections, top_classes) + if output[i] is None: output[i] = detections else: output[i] = torch.cat((output[i], detections)) + return output +def preprocess_double_class_instances(detections, top_classes): + used_class = [] + sorted_dets = sorted(enumerate(detections), key=lambda x:x[1][-2], reverse=True) + for idx, det in sorted_dets: + class_id = int(det[-1]) + if class_id not in used_class: + used_class.append(int(det[-1])) + continue + else: + idx_counter = 0 + while class_id in used_class: + idx_counter += 1 + class_id = int(top_classes[idx][idx_counter]) + detections[idx][-1] = float(class_id) + return detections + + def bboxes_iou(bboxes_a, bboxes_b, xyxy=True): if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: raise IndexError From d7a1ad197c427a08dce48fbabd7cec4ab901e2c5 Mon Sep 17 00:00:00 2001 From: Dawid Stachowiak Date: Wed, 15 Jun 2022 13:51:11 +0200 Subject: [PATCH 20/30] changed integration from neptune to mlflow --- tools/train.py | 36 +++++++++++++++++++----------------- yolox/core/trainer.py | 16 ++++++++-------- yolox/exp/base_exp.py | 21 +++++---------------- yolox/utils/checkpoint.py | 8 ++++---- 4 files changed, 36 insertions(+), 45 deletions(-) diff --git a/tools/train.py b/tools/train.py index cfbf19954..953eb66d6 100644 --- a/tools/train.py +++ b/tools/train.py @@ -5,6 +5,7 @@ import argparse import random import warnings +import mlflow from loguru import logger import yaml @@ -104,7 +105,7 @@ def make_parser(): @logger.catch -def main(exp, args): +def main(exp, run, args): if exp.seed is not None: random.seed(exp.seed) torch.manual_seed(exp.seed) @@ -120,17 +121,15 @@ def main(exp, args): configure_omp() cudnn.benchmark = True - trainer = Trainer(exp, args) + trainer = Trainer(exp, run, args) trainer.train() if __name__ == "__main__": args = make_parser().parse_args() exp = get_exp(args.exp_file, args.name) - - #TODO: Add neptune logging with multidevice training. Logging now works only - # on 1 gpu device training, not working with multiprocessing. - exp.set_neptune_logging(True) + mlflow.set_tracking_uri('http://127.0.0.1:5000') + run = mlflow.start_run() exp.merge(args.opts) @@ -140,18 +139,21 @@ def main(exp, args): if args.config_filepath is not None: with open(args.config_filepath, "r") as f: config = yaml.safe_load(f) - exp.add_params_from_config(config, use_neptune=True) - exp.neptune['config_file'].upload(args.config_filepath) num_gpu = get_num_devices() if args.devices is None else args.devices assert num_gpu <= get_num_devices() dist_url = "auto" if args.dist_url is None else args.dist_url - launch( - main, - num_gpu, - args.num_machines, - args.machine_rank, - backend=args.dist_backend, - dist_url=dist_url, - args=(exp, args), - ) + with run: + if args.config_filepath is not None: + mlflow.log_artifact(args.config_filepath, 'config_file') + exp.run = run + exp.add_params_from_config(config, use_mlflow=True) + launch( + main, + num_gpu, + args.num_machines, + args.machine_rank, + backend=args.dist_backend, + dist_url=dist_url, + args=(exp, run, args), + ) diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index 215e47c5d..2535429ef 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -5,6 +5,7 @@ import datetime import os import time +import mlflow from loguru import logger import torch @@ -33,12 +34,12 @@ class Trainer: - def __init__(self, exp, args): + def __init__(self, exp, run, args): # init function only defines some basic attr, other attrs like model, optimizer are built in # before_train methods. self.exp = exp self.args = args - self.neptune = self.exp.neptune + self.run = run # training related attr self.max_epoch = exp.max_epoch self.amp_training = args.fp16 @@ -117,7 +118,7 @@ def train_one_iter(self): self.ema_model.update(self.model) lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1) - self.neptune['config/lr'].log(lr) + mlflow.log_metric("lr", lr) for param_group in self.optimizer.param_groups: param_group["lr"] = lr @@ -259,7 +260,7 @@ def after_iter(self): ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()] ) for loss_name, loss_value in loss_meter.items(): - self.neptune[f"loss/{loss_name}"].log(loss_value.latest) + mlflow.log_metric(f"loss/{loss_name}", loss_value.latest) time_meter = self.meter.get_filtered_meter("time") time_str = ", ".join( ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()] @@ -347,7 +348,7 @@ def evaluate_and_save_model(self): update_best_ckpt = ap50_95 > self.best_ap self.best_ap = max(self.best_ap, ap50_95) - self.neptune['metrics/best_ap'].log(self.best_ap) + mlflow.log_metric(f"metrics/best_ap", self.best_ap) if self.rank == 0: if self.args.logger == "tensorboard": self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1) @@ -380,9 +381,8 @@ def save_ckpt(self, ckpt_name, update_best_ckpt=False): update_best_ckpt, self.file_name, ckpt_name, - self.neptune, + self.run, ) - if self.args.logger == "wandb": self.wandb_logger.save_checkpoint(self.file_name, ckpt_name, update_best_ckpt) @@ -407,5 +407,5 @@ def calculate_eval_loss(self): ) for loss_name, loss_value in loss.items(): progress_str += " {}: {:.1f},".format(loss_name, loss_value) - self.neptune[f"loss/val/{loss_name}"].log(loss_value) + mlflow.log_metric(f"loss/val/{loss_name}", loss_value) logger.info("Validation:{}".format(progress_str)) diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py index 1aabd0d51..789a41590 100644 --- a/yolox/exp/base_exp.py +++ b/yolox/exp/base_exp.py @@ -6,9 +6,9 @@ import pprint from abc import ABCMeta, abstractmethod from typing import Dict +import mlflow from tabulate import tabulate -import neptune.new as neptune import torch from torch.nn import Module @@ -23,7 +23,7 @@ def __init__(self): self.output_dir = "./YOLOX_outputs" self.print_interval = 100 self.eval_interval = 10 - self.neptune = None + self.run = None @abstractmethod def get_model(self) -> Module: @@ -62,17 +62,6 @@ def __repr__(self): ] return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid") - def set_neptune_logging(self, state): - if state: - self.neptune = neptune.init( - project="jakub.pingielski/b-yond", - api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI2NTlkYzZmZC1kZTY5LTQ2NjMtODFkZC04YmY4NTNmYTkwMTIifQ==", - ) - else: - if self.neptune is not None: - self.neptune.stop() - self.neptune = None - def merge(self, cfg_list): assert len(cfg_list) % 2 == 0 for k, v in zip(cfg_list[0::2], cfg_list[1::2]): @@ -87,13 +76,13 @@ def merge(self, cfg_list): v = ast.literal_eval(v) setattr(self, k, v) - def add_params_from_config(self, config: dict, use_neptune: bool = True): + def add_params_from_config(self, config: dict, use_mlflow: bool = False): for key, value in config.items(): if key == "dataset_version": setattr(self, "dataset_dir", DATASETS_PATH / value) else: setattr(self, key, value) - if use_neptune and self.neptune: - self.neptune[f"config/{key}"].log(value) + if use_mlflow and self.run: + mlflow.log_param(key, value) diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py index e7d732bef..8a8997ad4 100644 --- a/yolox/utils/checkpoint.py +++ b/yolox/utils/checkpoint.py @@ -4,7 +4,7 @@ import os import shutil from loguru import logger -import neptune.new as neptune +import mlflow import torch @@ -34,7 +34,7 @@ def load_ckpt(model, ckpt): return model -def save_checkpoint(state, is_best, save_dir, model_name="", neptune=None): +def save_checkpoint(state, is_best, save_dir, model_name="", run=None): if not os.path.exists(save_dir): os.makedirs(save_dir) filename = os.path.join(save_dir, model_name + "_ckpt.pth") @@ -42,5 +42,5 @@ def save_checkpoint(state, is_best, save_dir, model_name="", neptune=None): if is_best: best_filename = os.path.join(save_dir, "best_ckpt.pth") shutil.copyfile(filename, best_filename) - if neptune: - neptune['best_checkpoint'].upload(best_filename) + if run: + mlflow.log_artifact(best_filename, 'best_checkpoint') From b2f55a006dde3f8efecb11a60ae002f72fea68df Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 23 Jun 2022 10:35:24 +0000 Subject: [PATCH 21/30] added exp connection --- tools/train.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/train.py b/tools/train.py index 953eb66d6..7f19a79b6 100644 --- a/tools/train.py +++ b/tools/train.py @@ -128,7 +128,8 @@ def main(exp, run, args): if __name__ == "__main__": args = make_parser().parse_args() exp = get_exp(args.exp_file, args.name) - mlflow.set_tracking_uri('http://127.0.0.1:5000') + mlflow.set_tracking_uri('http://localhost:5000') + mlflow.set_experiment('phoenix-suns-vz-ar') run = mlflow.start_run() exp.merge(args.opts) From 49b7d4b14e68026a836c9c275e761b41e1296412 Mon Sep 17 00:00:00 2001 From: Dawid Stachowiak Date: Thu, 30 Jun 2022 12:15:49 +0200 Subject: [PATCH 22/30] parametrized mlflow connection --- tools/train.py | 22 +++++++++++++++++----- yolox/core/trainer.py | 12 ++++++++---- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/train.py b/tools/train.py index 953eb66d6..577633411 100644 --- a/tools/train.py +++ b/tools/train.py @@ -101,6 +101,13 @@ def make_parser(): default=None, nargs=argparse.REMAINDER, ) + parser.add_argument( + "-ml", + "--mlflow-url", + type=str, + help="MLFlow instance url for logging metrics and files.", + default=None + ) return parser @@ -128,8 +135,9 @@ def main(exp, run, args): if __name__ == "__main__": args = make_parser().parse_args() exp = get_exp(args.exp_file, args.name) - mlflow.set_tracking_uri('http://127.0.0.1:5000') - run = mlflow.start_run() + if args.mlflow_url is not None: + mlflow.set_tracking_uri(args.mlflow_url) + run = mlflow.start_run() exp.merge(args.opts) @@ -145,9 +153,13 @@ def main(exp, run, args): dist_url = "auto" if args.dist_url is None else args.dist_url with run: if args.config_filepath is not None: - mlflow.log_artifact(args.config_filepath, 'config_file') - exp.run = run - exp.add_params_from_config(config, use_mlflow=True) + run = None + if args.mlflow_url is not None: + mlflow.log_artifact(args.config_filepath, 'config_file') + exp.run = run + exp.add_params_from_config(config, use_mlflow=True) + else: + exp.add_params_from_config(config) launch( main, num_gpu, diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index 2535429ef..c4da37e4e 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -118,7 +118,8 @@ def train_one_iter(self): self.ema_model.update(self.model) lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1) - mlflow.log_metric("lr", lr) + if self.run is not None: + mlflow.log_metric("lr", lr) for param_group in self.optimizer.param_groups: param_group["lr"] = lr @@ -260,7 +261,8 @@ def after_iter(self): ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()] ) for loss_name, loss_value in loss_meter.items(): - mlflow.log_metric(f"loss/{loss_name}", loss_value.latest) + if self.run is not None: + mlflow.log_metric(f"loss/{loss_name}", loss_value.latest) time_meter = self.meter.get_filtered_meter("time") time_str = ", ".join( ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()] @@ -348,7 +350,8 @@ def evaluate_and_save_model(self): update_best_ckpt = ap50_95 > self.best_ap self.best_ap = max(self.best_ap, ap50_95) - mlflow.log_metric(f"metrics/best_ap", self.best_ap) + if self.run is not None: + mlflow.log_metric(f"metrics/best_ap", self.best_ap) if self.rank == 0: if self.args.logger == "tensorboard": self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1) @@ -407,5 +410,6 @@ def calculate_eval_loss(self): ) for loss_name, loss_value in loss.items(): progress_str += " {}: {:.1f},".format(loss_name, loss_value) - mlflow.log_metric(f"loss/val/{loss_name}", loss_value) + if self.run is not None: + mlflow.log_metric(f"loss/val/{loss_name}", loss_value) logger.info("Validation:{}".format(progress_str)) From dd91165f6d636270fb048f22d58c29cc2505b799 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 30 Jun 2022 13:19:29 +0000 Subject: [PATCH 23/30] added configurable exp name --- tools/train.py | 56 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/tools/train.py b/tools/train.py index 577633411..c573258ba 100644 --- a/tools/train.py +++ b/tools/train.py @@ -103,11 +103,18 @@ def make_parser(): ) parser.add_argument( "-ml", - "--mlflow-url", + "--mlflow_url", type=str, help="MLFlow instance url for logging metrics and files.", default=None ) + parser.add_argument( + "-mlex", + "--mlflow_experiment_name", + type=str, + help="Experiment name to log metrics and files", + default=None + ) return parser @@ -135,9 +142,11 @@ def main(exp, run, args): if __name__ == "__main__": args = make_parser().parse_args() exp = get_exp(args.exp_file, args.name) - if args.mlflow_url is not None: + run = None + if args.mlflow_url is not None and args.mlflow_experiment_name is not None: mlflow.set_tracking_uri(args.mlflow_url) - run = mlflow.start_run() + experiment = mlflow.get_experiment_by_name(args.mlflow_experiment_name) + run = mlflow.start_run(experiment_id=experiment.experiment_id) exp.merge(args.opts) @@ -151,21 +160,30 @@ def main(exp, run, args): assert num_gpu <= get_num_devices() dist_url = "auto" if args.dist_url is None else args.dist_url - with run: + if run is not None: + with run: + if args.config_filepath is not None: + mlflow.log_artifact(args.config_filepath, 'config_file') + exp.run = run + exp.add_params_from_config(config, use_mlflow=True) + launch( + main, + num_gpu, + args.num_machines, + args.machine_rank, + backend=args.dist_backend, + dist_url=dist_url, + args=(exp, run, args), + ) + else: if args.config_filepath is not None: - run = None - if args.mlflow_url is not None: - mlflow.log_artifact(args.config_filepath, 'config_file') - exp.run = run - exp.add_params_from_config(config, use_mlflow=True) - else: - exp.add_params_from_config(config) + exp.add_params_from_config(config) launch( - main, - num_gpu, - args.num_machines, - args.machine_rank, - backend=args.dist_backend, - dist_url=dist_url, - args=(exp, run, args), - ) + main, + num_gpu, + args.num_machines, + args.machine_rank, + backend=args.dist_backend, + dist_url=dist_url, + args=(exp, run, args), + ) From 58e19c001cfdb90a17f9b0aedf2ced1152518f91 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 4 Jul 2022 13:50:06 +0000 Subject: [PATCH 24/30] changes for classes by config refactor --- yolox/core/trainer.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index c4da37e4e..a05564385 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -261,8 +261,16 @@ def after_iter(self): ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()] ) for loss_name, loss_value in loss_meter.items(): +<<<<<<< Updated upstream +<<<<<<< Updated upstream if self.run is not None: mlflow.log_metric(f"loss/{loss_name}", loss_value.latest) +======= + self.neptune[loss_name].log(loss_value.latest) +>>>>>>> Stashed changes +======= + self.neptune[loss_name].log(loss_value.latest) +>>>>>>> Stashed changes time_meter = self.meter.get_filtered_meter("time") time_str = ", ".join( ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()] From 01279e1919c28f88a141a00170253459616717f1 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 4 Jul 2022 14:01:06 +0000 Subject: [PATCH 25/30] repair bug --- yolox/core/trainer.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py index a05564385..c4da37e4e 100644 --- a/yolox/core/trainer.py +++ b/yolox/core/trainer.py @@ -261,16 +261,8 @@ def after_iter(self): ["{}: {:.1f}".format(k, v.latest) for k, v in loss_meter.items()] ) for loss_name, loss_value in loss_meter.items(): -<<<<<<< Updated upstream -<<<<<<< Updated upstream if self.run is not None: mlflow.log_metric(f"loss/{loss_name}", loss_value.latest) -======= - self.neptune[loss_name].log(loss_value.latest) ->>>>>>> Stashed changes -======= - self.neptune[loss_name].log(loss_value.latest) ->>>>>>> Stashed changes time_meter = self.meter.get_filtered_meter("time") time_str = ", ".join( ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()] From b9315019666831992772e2fe52ff9ae4fdc80f0f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 5 Jul 2022 12:54:01 +0000 Subject: [PATCH 26/30] fixes regarding training --- tools/train.py | 6 +++--- yolox/exp/base_exp.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/train.py b/tools/train.py index c573258ba..6de7d7155 100644 --- a/tools/train.py +++ b/tools/train.py @@ -163,9 +163,9 @@ def main(exp, run, args): if run is not None: with run: if args.config_filepath is not None: - mlflow.log_artifact(args.config_filepath, 'config_file') - exp.run = run - exp.add_params_from_config(config, use_mlflow=True) + mlflow.log_artifact(args.config_filepath, 'config_file') + exp.run = run + exp.add_params_from_config(config, use_mlflow=True) launch( main, num_gpu, diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py index 789a41590..833045b45 100644 --- a/yolox/exp/base_exp.py +++ b/yolox/exp/base_exp.py @@ -82,7 +82,7 @@ def add_params_from_config(self, config: dict, use_mlflow: bool = False): setattr(self, "dataset_dir", DATASETS_PATH / value) else: setattr(self, key, value) - if use_mlflow and self.run: + if use_mlflow and self.run and key != "classes_mapping": mlflow.log_param(key, value) From be92108a25dedf1abbc3a378cb7241711f52053f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 5 Jul 2022 13:22:25 +0000 Subject: [PATCH 27/30] fix errors in true divide --- yolox/evaluators/voc_eval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yolox/evaluators/voc_eval.py b/yolox/evaluators/voc_eval.py index a9e85d3fc..35a13ee86 100644 --- a/yolox/evaluators/voc_eval.py +++ b/yolox/evaluators/voc_eval.py @@ -10,6 +10,7 @@ import xml.etree.ElementTree as ET import numpy as np +np.seterr(invalid='ignore') def parse_rec(filename): From e7e9a23bbaee35ba8df57e03ac82256b891283f2 Mon Sep 17 00:00:00 2001 From: Aditya-Bobade Date: Tue, 5 Jul 2022 20:49:05 +0530 Subject: [PATCH 28/30] install specific library version --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 05a655866..712c0c5c0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ # TODO: Update with exact module version numpy -torch>=1.7 +torch==1.11.0 opencv_python loguru scikit-image tqdm -torchvision +torchvision==0.12.0 Pillow thop ninja From 7c346adc819bbf35e351d1635e48e159a731c831 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 6 Jul 2022 13:05:10 +0000 Subject: [PATCH 29/30] fixing returning only one instance of class --- yolox/utils/boxes.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/yolox/utils/boxes.py b/yolox/utils/boxes.py index f56bf0679..30920f310 100644 --- a/yolox/utils/boxes.py +++ b/yolox/utils/boxes.py @@ -46,6 +46,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn # Get score and class with highest confidence class_confs = image_pred[:, 5: 5 + num_classes] top_confs, top_classes = torch.topk(class_confs, num_classes, 1, sorted=True) + class_conf = top_confs[:,0].unsqueeze(1) class_pred = top_classes[:,0].unsqueeze(1) @@ -54,7 +55,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) - top_classes = top_classes[conf_mask] + top_classes = class_pred[conf_mask] detections = detections[conf_mask] if not detections.size(0): continue @@ -75,7 +76,7 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn top_classes = top_classes[nms_out_index] detections = detections[nms_out_index] - detections = preprocess_double_class_instances(detections, top_classes) + detections = process_double_class_instances(detections, top_classes) if output[i] is None: output[i] = detections @@ -86,21 +87,19 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn return output -def preprocess_double_class_instances(detections, top_classes): +def process_double_class_instances(detections, top_classes): used_class = [] + processed_detections = [] + unique_classes = top_classes.unique() sorted_dets = sorted(enumerate(detections), key=lambda x:x[1][-2], reverse=True) for idx, det in sorted_dets: class_id = int(det[-1]) if class_id not in used_class: used_class.append(int(det[-1])) - continue - else: - idx_counter = 0 - while class_id in used_class: - idx_counter += 1 - class_id = int(top_classes[idx][idx_counter]) - detections[idx][-1] = float(class_id) - return detections + processed_detections.append(detections[idx]) + if len(used_class) == len(unique_classes): + break + return torch.stack(processed_detections) def bboxes_iou(bboxes_a, bboxes_b, xyxy=True): From 2d8bce3b49be83ee1dfa6909c1e017b4e6ca58f9 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 19 Jul 2022 11:43:21 +0000 Subject: [PATCH 30/30] reverts changeds regardin postprocess (moved to end of pipeline) --- yolox/utils/boxes.py | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/yolox/utils/boxes.py b/yolox/utils/boxes.py index 30920f310..17d6c9eec 100644 --- a/yolox/utils/boxes.py +++ b/yolox/utils/boxes.py @@ -44,18 +44,11 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn if not image_pred.size(0): continue # Get score and class with highest confidence - class_confs = image_pred[:, 5: 5 + num_classes] - top_confs, top_classes = torch.topk(class_confs, num_classes, 1, sorted=True) + class_conf, class_pred = torch.max(image_pred[:, 5: 5 + num_classes], 1, keepdim=True) - class_conf = top_confs[:,0].unsqueeze(1) - class_pred = top_classes[:,0].unsqueeze(1) - - conf_mask = (image_pred[:, 4] * top_confs[:,0].squeeze() >= conf_thre).squeeze() + conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) - detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) - - top_classes = class_pred[conf_mask] detections = detections[conf_mask] if not detections.size(0): continue @@ -74,32 +67,13 @@ def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45, class_agn nms_thre, ) - top_classes = top_classes[nms_out_index] detections = detections[nms_out_index] - detections = process_double_class_instances(detections, top_classes) - if output[i] is None: output[i] = detections else: output[i] = torch.cat((output[i], detections)) - return output - - -def process_double_class_instances(detections, top_classes): - used_class = [] - processed_detections = [] - unique_classes = top_classes.unique() - sorted_dets = sorted(enumerate(detections), key=lambda x:x[1][-2], reverse=True) - for idx, det in sorted_dets: - class_id = int(det[-1]) - if class_id not in used_class: - used_class.append(int(det[-1])) - processed_detections.append(detections[idx]) - if len(used_class) == len(unique_classes): - break - return torch.stack(processed_detections) def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):