Update nuscenes_converter.py

thu-ml · Jul 10, 2023 · 6316eab · 6316eab
1 parent 0cdf2c1
commit 6316eab
Showing 1 changed file with 129 additions and 24 deletions.
diff --git a/TransFusion/tools/data_converter/nuscenes_converter.py b/TransFusion/tools/data_converter/nuscenes_converter.py
@@ -1,20 +1,28 @@
-import mmcv
-import numpy as np
+# Copyright (c) OpenMMLab. All rights reserved.
 import os
 from collections import OrderedDict
+from os import path as osp
+from typing import List, Tuple, Union
+
+import mmcv
+import numpy as np
 from nuscenes.nuscenes import NuScenes
 from nuscenes.utils.geometry_utils import view_points
-from os import path as osp
 from pyquaternion import Quaternion
 from shapely.geometry import MultiPoint, box
-from typing import List, Tuple, Union
 
+from mmdet3d.core.bbox import points_cam2img
 from mmdet3d.datasets import NuScenesDataset
 
 nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
                   'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
                   'barrier')
 
+nus_attributes = ('cycle.with_rider', 'cycle.without_rider',
+                  'pedestrian.moving', 'pedestrian.standing',
+                  'pedestrian.sitting_lying_down', 'vehicle.moving',
+                  'vehicle.parked', 'vehicle.stopped', 'None')
+
 
 def create_nuscenes_infos(root_path,
                           info_prefix,
@@ -27,10 +35,10 @@ def create_nuscenes_infos(root_path,
     Args:
         root_path (str): Path of the data root.
         info_prefix (str): Prefix of the info file to be generated.
-        version (str): Version of the data.
-            Default: 'v1.0-trainval'
-        max_sweeps (int): Max number of sweeps.
-            Default: 10
+        version (str, optional): Version of the data.
+            Default: 'v1.0-trainval'.
+        max_sweeps (int, optional): Max number of sweeps.
+            Default: 10.
     """
     from nuscenes.nuscenes import NuScenes
     nusc = NuScenes(version=version, dataroot=root_path, verbose=True)
@@ -145,9 +153,9 @@ def _fill_trainval_infos(nusc,
         nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset.
         train_scenes (list[str]): Basic information of training scenes.
         val_scenes (list[str]): Basic information of validation scenes.
-        test (bool): Whether use the test mode. In the test mode, no
+        test (bool, optional): Whether use the test mode. In test mode, no
             annotations can be accessed. Default: False.
-        max_sweeps (int): Max number of sweeps. Default: 10.
+        max_sweeps (int, optional): Max number of sweeps. Default: 10.
 
     Returns:
         tuple[list[dict]]: Information of training set and validation set
@@ -170,7 +178,9 @@ def _fill_trainval_infos(nusc,
             'lidar_path': lidar_path,
             'token': sample['token'],
             'sweeps': [],
+            'sweeps_back': [],
             'cams': dict(),
+            'cam_sweeps': [],
             'lidar2ego_translation': cs_record['translation'],
             'lidar2ego_rotation': cs_record['rotation'],
             'ego2global_translation': pose_record['translation'],
@@ -204,16 +214,48 @@ def _fill_trainval_infos(nusc,
 
         # obtain sweeps for a single key-frame
         sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP'])
+
+        back_sweep = []
+        if not sd_rec['next'] == '':
+            sweep = obtain_sensor2top(nusc, sd_rec['next'], l2e_t,
+                                        l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
+            back_sweep.append(sweep)
+        # else:
+        #     break
+        info['sweeps_back'] = back_sweep
+        # print("------here")
+        # print(info['sweeps_back'])
+        # cam_rec = nusc.get('sample_data', sample['data']['CAM_FRONT'])
+
+        cam_sweep = []
+
+        for cam in camera_types:
+            # print(cam)
+            camk = []
+            while len(camk) < max_sweeps:
+                cam_rec = nusc.get('sample_data', sample['data']['%s'%cam])
+                if not cam_rec['prev'] == '':
+                    cam_re = nusc.get('sample_data', cam_rec['prev'])
+                    camk.append(cam_re)
+                else:
+                    break
+            cam_sweep.append(camk)
+        info['cam_sweeps'] = cam_sweep
+
         sweeps = []
         while len(sweeps) < max_sweeps:
             if not sd_rec['prev'] == '':
                 sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t,
-                                          l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
+                                        l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
                 sweeps.append(sweep)
                 sd_rec = nusc.get('sample_data', sd_rec['prev'])
             else:
                 break
         info['sweeps'] = sweeps
+        # print(cam_sweep)
+        # # print(sd_rec)
+        # print("-----"*40)
+
         # obtain annotation
         if not test:
             annotations = [
@@ -242,8 +284,10 @@ def _fill_trainval_infos(nusc,
                 if names[i] in NuScenesDataset.NameMapping:
                     names[i] = NuScenesDataset.NameMapping[names[i]]
             names = np.array(names)
-            # we need to convert rot to SECOND format.
-            gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)
+            # we need to convert box size to
+            # the format of our lidar coordinate system
+            # which is x_size, y_size, z_size (corresponding to l, w, h)
+            gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1)
             assert len(gt_boxes) == len(
                 annotations), f'{len(gt_boxes)}, {len(annotations)}'
             info['gt_boxes'] = gt_boxes
@@ -282,7 +326,7 @@ def obtain_sensor2top(nusc,
         e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).
         e2g_r_mat (np.ndarray): Rotation matrix from ego to global
             in shape (3, 3).
-        sensor_type (str): Sensor to calibrate. Default: 'lidar'.
+        sensor_type (str, optional): Sensor to calibrate. Default: 'lidar'.
 
     Returns:
         sweep (dict): Sweep information after transformation.
@@ -324,13 +368,15 @@ def obtain_sensor2top(nusc,
     return sweep
 
 
-def export_2d_annotation(root_path, info_path, version):
+def export_2d_annotation(root_path, info_path, version, mono3d=True):
     """Export 2d annotation from the info file and raw data.
 
     Args:
         root_path (str): Root path of the raw data.
         info_path (str): Path of the info file.
         version (str): Dataset version.
+        mono3d (bool, optional): Whether to export mono3d annotation.
+            Default: True.
     """
     # get bbox annotations for camera
     camera_types = [
@@ -356,12 +402,20 @@ def export_2d_annotation(root_path, info_path, version):
             coco_infos = get_2d_boxes(
                 nusc,
                 cam_info['sample_data_token'],
-                visibilities=['', '1', '2', '3', '4'])
+                visibilities=['', '1', '2', '3', '4'],
+                mono3d=mono3d)
             (height, width, _) = mmcv.imread(cam_info['data_path']).shape
             coco_2d_dict['images'].append(
                 dict(
-                    file_name=cam_info['data_path'],
+                    file_name=cam_info['data_path'].split('data/nuscenes/')
+                    [-1],
                     id=cam_info['sample_data_token'],
+                    token=info['token'],
+                    cam2ego_rotation=cam_info['sensor2ego_rotation'],
+                    cam2ego_translation=cam_info['sensor2ego_translation'],
+                    ego2global_rotation=info['ego2global_rotation'],
+                    ego2global_translation=info['ego2global_translation'],
+                    cam_intrinsic=cam_info['cam_intrinsic'],
                     width=width,
                     height=height))
             for coco_info in coco_infos:
@@ -372,16 +426,24 @@ def export_2d_annotation(root_path, info_path, version):
                 coco_info['id'] = coco_ann_id
                 coco_2d_dict['annotations'].append(coco_info)
                 coco_ann_id += 1
-    mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')
+    if mono3d:
+        json_prefix = f'{info_path[:-4]}_mono3d'
+    else:
+        json_prefix = f'{info_path[:-4]}'
+    mmcv.dump(coco_2d_dict, f'{json_prefix}.coco.json')
 
 
-def get_2d_boxes(nusc, sample_data_token: str,
-                 visibilities: List[str]) -> List[OrderedDict]:
+def get_2d_boxes(nusc,
+                 sample_data_token: str,
+                 visibilities: List[str],
+                 mono3d=True):
     """Get the 2D annotation records for a given `sample_data_token`.
 
     Args:
-        sample_data_token: Sample data token belonging to a camera keyframe.
-        visibilities: Visibility filter.
+        sample_data_token (str): Sample data token belonging to a camera
+            keyframe.
+        visibilities (list[str]): Visibility filter.
+        mono3d (bool): Whether to get boxes with mono3d annotation.
 
     Return:
         list[dict]: List of 2D annotation record that belongs to the input
@@ -456,6 +518,48 @@ def get_2d_boxes(nusc, sample_data_token: str,
         # Generate dictionary record to be included in the .json file.
         repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
                                     sample_data_token, sd_rec['filename'])
+
+        # If mono3d=True, add 3D annotations in camera coordinates
+        if mono3d and (repro_rec is not None):
+            loc = box.center.tolist()
+
+            dim = box.wlh
+            dim[[0, 1, 2]] = dim[[1, 2, 0]]  # convert wlh to our lhw
+            dim = dim.tolist()
+
+            rot = box.orientation.yaw_pitch_roll[0]
+            rot = [-rot]  # convert the rot to our cam coordinate
+
+            global_velo2d = nusc.box_velocity(box.token)[:2]
+            global_velo3d = np.array([*global_velo2d, 0.0])
+            e2g_r_mat = Quaternion(pose_rec['rotation']).rotation_matrix
+            c2e_r_mat = Quaternion(cs_rec['rotation']).rotation_matrix
+            cam_velo3d = global_velo3d @ np.linalg.inv(
+                e2g_r_mat).T @ np.linalg.inv(c2e_r_mat).T
+            velo = cam_velo3d[0::2].tolist()
+
+            repro_rec['bbox_cam3d'] = loc + dim + rot
+            repro_rec['velo_cam3d'] = velo
+
+            center3d = np.array(loc).reshape([1, 3])
+            center2d = points_cam2img(
+                center3d, camera_intrinsic, with_depth=True)
+            repro_rec['center2d'] = center2d.squeeze().tolist()
+            # normalized center2D + depth
+            # if samples with depth < 0 will be removed
+            if repro_rec['center2d'][2] <= 0:
+                continue
+
+            ann_token = nusc.get('sample_annotation',
+                                 box.token)['attribute_tokens']
+            if len(ann_token) == 0:
+                attr_name = 'None'
+            else:
+                attr_name = nusc.get('attribute', ann_token[0])['name']
+            attr_id = nus_attributes.index(attr_name)
+            repro_rec['attribute_name'] = attr_name
+            repro_rec['attribute_id'] = attr_id
+
         repro_recs.append(repro_rec)
 
     return repro_recs
@@ -496,7 +600,7 @@ def post_process_coords(
 
 def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
                     sample_data_token: str, filename: str) -> OrderedDict:
-    """Generate one 2D annotation record given various informations on top of
+    """Generate one 2D annotation record given various information on top of
     the 2D bounding box coordinates.
 
     Args:
@@ -511,7 +615,7 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
 
     Returns:
         dict: A sample 2D annotation record.
-            - file_name (str): flie name
+            - file_name (str): file name
             - image_id (str): sample data token
             - area (float): 2d box area
             - category_name (str): category name
@@ -556,3 +660,4 @@ def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
     coco_rec['iscrowd'] = 0
 
     return coco_rec
+