Skip to content

Commit

Permalink
Add YoloTiny adapter
Browse files Browse the repository at this point in the history
  • Loading branch information
SergeyBoRss committed Dec 15, 2024
1 parent 04a416a commit a8b22fa
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ ssd300 | - | - | Bounding box: (380,165), (595,425) | Boundin
ssd512 | - | - | Bounding box: (377,163), (595,425) | Bounding box: (380,165), (595,425) |
ssd_mobilenet_v1_fpn_coco | - | - | Bounding boxes: (295, 131), (439, 291),<br> (375, 217), (582, 425),<br> (436, 153), (611, 301) | Bounding boxes: (295, 131), (439, 291),<br> (375, 217), (582, 425),<br> (436, 153), (611, 301) |
ssdlite_mobilenet_v2 | - | - | - | - |
yolo-v3-tiny-tf | - | - | Bounding box: (127, 161), (228, 325),<br> (43, 139), (127, 286), <br> (212, 147), (345, 341), <br> (175, 110), (251, 243) | Bounding box: (127, 161), (228, 325),<br> (43, 139), (127, 286), <br> (212, 147), (345, 341), <br> (175, 110), (251, 243) |

### Test image #2

Expand All @@ -366,6 +367,7 @@ ssd300 | - | - | Bounding box: (68,100), (336,452) | Bounding
ssd512 | - | - | Bounding box: (75,100), (355,445) | Bounding box: (75,100), (355,445)|
ssd_mobilenet_v1_fpn_coco | - | - | Bounding box: (89, 98), (345, 440)| Bounding box: (89, 98), (345, 440)|
ssdlite_mobilenet_v2 | - | - | Bounding box: (47, 59), (206, 272)| Bounding box: (47, 59), (206, 272)|
yolo-v3-tiny-tf | - | - | Bounding box: (39, 36), (324, 452)| Bounding box: (39, 36), (324, 452)|

### Test image #3

Expand All @@ -392,6 +394,7 @@ ssd300 | - | - | Bounding box: (80,155), (270,375) | Boundin
ssd512 | - | - | Bounding box: (75,170), (172,370) | Bounding box: (73,170), (173,371) |
ssd_mobilenet_v1_fpn_coco | - | - | Bounding box: (90, 135), (260, 375)| Bounding box: (90, 135), (260, 375)|
ssdlite_mobilenet_v2 | - | - | Bounding boxes: (74, 155), (242, 226), (75, 102), (242, 225)| Bounding boxes: (74, 155), (242, 226), (75, 102), (242, 225)|
yolo-v3-tiny-tf | - | - | Bounding boxes: (134, 105), (288, 319), <br>(127, 280), (299, 330)| Bounding boxes: (134, 105), (288, 319), <br>(127, 280), (299, 330)|

### Test image #4
Data source: [MS COCO][ms_coco]
Expand Down Expand Up @@ -424,6 +427,7 @@ pelee-coco |-|-| Bounding box:<br>TV (103, 41), (402, 289)<br>MOUSE (not detecte
retinanet-tf |-|-| Bounding box:<br>TV (104, 40), (390, 298)<br>MOUSE (507, 337), (559, 373)<br>KEYBOARD (231, 331), (497, 455) | Bounding box:<br>TV (104, 40), (390, 298)<br>MOUSE (507, 337), (559, 373)<br>KEYBOARD (231, 331), (497, 455)<br>|
ssd_resnet50_v1_fpn_coco |-|-| Bounding box:<br>TV (113, 40), (396, 305)<br>MOUSE (508, 337), (559, 373)<br>KEYBOARD (223, 340), (499, 461) | Bounding box:<br>TV (113, 40), (396, 305)<br>MOUSE (508, 337), (559, 373)<br>KEYBOARD (223, 340), (499, 461)<br>|
ssdlite_mobilenet_v2 |-|-| Bounding box:<br>TV (45, 23), (182, 181)<br>MOUSE (238, 209), (261, 229)<br>KEYBOARD (108, 212), (235, 287) | Bounding box:<br>TV (45, 23), (182, 181)<br>MOUSE (238, 209), (261, 229)<br>KEYBOARD (108, 212), (235, 287)<br>|
yolo-v3-tiny-tf |-|-| Bounding box:<br>TV (87, 29), (265, 267)<br>MOUSE (330, 292), (362, 332)<br>KEYBOARD (156, 305), (313, 388) | Bounding box:<br>TV (87, 29), (265, 267)<br>MOUSE (330, 292), (362, 332)<br>KEYBOARD (156, 305), (313, 388)<br>|

### Test image #5
Data source: [MS COCO][ms_coco]
Expand All @@ -448,6 +452,7 @@ pelee-coco |-|-| Bounding box:<br>PERSON (95, 72), (207, 397)
retinanet-tf |-|-| Bounding box:<br>PERSON (90, 73), (205, 384)<br>HORSE (145, 61), (542, 378) | Bounding box:<br>PERSON (90, 73), (205, 384)<br>HORSE (145, 61), (542, 378)<br>|
ssd_resnet50_v1_fpn_coco |-|-| Bounding box:<br>PERSON (not detected)<br>HORSE (134, 57), (534, 389) | Bounding box:<br>PERSON (not detected)<br>HORSE (134, 57), (534, 389)<br>|
ssdlite_mobilenet_v2 |-|-| Bounding box:<br>PERSON (43, 48), (98, 281)<br>HORSE (57, 42), (251, 271) | Bounding box:<br>PERSON (43, 48), (98, 281)<br>HORSE (57, 42), (251, 271)<br>|
yolo-v3-tiny-tf |-|-| Bounding box: HORSE (74, 44), (352, 382) | Bounding box: HORSE (74, 44), (352, 382)|


### Test image #6
Expand All @@ -467,6 +472,7 @@ Bounding box (upper left and bottom right corners):<br>AEROPLANE (131, 21), (24
efficientdet-d0-tf |-|-| Bounding box:<br>AIRPLANE (64, 173), (449, 333)<br>| Bounding box:<br>AIRPLANE (64, 173), (449, 333)<br>|
efficientdet-d1-tf |-|-| Bounding box:<br>AIRPLANE (71, 212), (551, 412)<br>| Bounding box:<br>AIRPLANE (71, 212), (551, 412)<br>|
yolo-v1-tiny-tf |-|-| Bounding box:<br>AEROPLANE (131, 21), (248, 414)<br>| Bounding box:<br>AEROPLANE (131, 21), (248, 414)<br>|
yolo-v3-tiny-tf |-|-| Bounding box:<br>AEROPLANE (-16, 138), (438, 281)<br>| Bounding box:<br>AEROPLANE (-16, 138), (438, 281)<br>|

### Test image #7
Data source: [WIDER FACE Dataset][wider_face_dataset]
Expand Down
2 changes: 1 addition & 1 deletion src/inference/inference_openvino_async_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def cli_argument_parser():
'person-detection-action-recognition-teacher', 'driver-action-recognition-encoder',
'reidentification', 'driver-action-recognition-decoder', 'action-recognition-decoder',
'face-detection', 'mask-rcnn', 'yolo_tiny_voc', 'yolo_v2_voc', 'yolo_v2_coco',
'yolo_v2_tiny_coco', 'yolo_v3', 'yolo_v3_tf'],
'yolo_v2_tiny_coco', 'yolo_v3', 'yolo_v3_tf', 'retinanet-tf', 'yolo_v3_tiny'],
default='feedforward',
type=str,
dest='task')
Expand Down
2 changes: 1 addition & 1 deletion src/inference/inference_openvino_sync_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def cli_argument_parser():
'action-recognition-encoder', 'driver-action-recognition-encoder', 'reidentification',
'driver-action-recognition-decoder', 'action-recognition-decoder', 'face-detection',
'mask-rcnn', 'yolo_tiny_voc', 'yolo_v2_voc', 'yolo_v2_coco', 'yolo_v2_tiny_coco',
'yolo_v3', 'yolo_v3_tf'],
'yolo_v3', 'yolo_v3_tf', 'retinanet-tf', 'yolo_v3_tiny'],
default='feedforward',
type=str,
dest='task')
Expand Down
123 changes: 119 additions & 4 deletions src/inference/io_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,8 @@ def get_io_adapter(args, io_model_wrapper, transformer):
return MiniFASNetV2TFLiteCppIO(args, io_model_wrapper, transformer)
elif task == 'retinanet-tf':
return RetinaNetDetectionIO(args, io_model_wrapper, transformer)
elif task == 'yolo_v3_tiny':
return YoloV3TinyIO(args, io_model_wrapper, transformer)


class FeedForwardIO(IOAdapter):
Expand Down Expand Up @@ -1797,7 +1799,7 @@ def _get_anchors(self):
def _get_shapes(self):
pass

def __non_max_supression(self, predictions, score_threshold, nms_threshold):
def _non_max_supression(self, predictions, score_threshold, nms_threshold):
predictions.sort(key=lambda prediction: prediction[0], reverse=True)
valid_detections = []
while len(predictions) > 0:
Expand Down Expand Up @@ -1832,7 +1834,7 @@ def __non_max_supression(self, predictions, score_threshold, nms_threshold):
return valid_detections

@staticmethod
def __print_detections(detections, labels_map, image, scales, orig_shape, batch, log):
def _print_detections(detections, labels_map, image, scales, orig_shape, batch, log):
image = cv2.resize(image, orig_shape)
for detection in detections:
left = int(detection[2][0] * scales['W'])
Expand All @@ -1843,8 +1845,8 @@ def __print_detections(detections, labels_map, image, scales, orig_shape, batch,
color = (min(int(class_id / 25 % 5) * 50, 255), min(int(class_id / 5 % 5) * 50, 255),
min(int(class_id % 5) * 50, 255))
log.info('Bounding boxes for image {0} for object {1}'.format(batch, class_id))
log.info('Top left: ({0}, {1})'.format(top, left))
log.info('Bottom right: ({0}, {1})'.format(bottom, right))
log.info('Top left: ({0}, {1})'.format(left, top))
log.info('Bottom right: ({0}, {1})'.format(right, bottom))
label = '<' + labels_map[class_id] + '>'
image = cv2.rectangle(image, (left, top), (right, bottom), color, 3)
label_size, base_line = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.75, 1)
Expand Down Expand Up @@ -2055,6 +2057,119 @@ def _get_cell_predictions(self, cx, cy, dx, dy, detection, anchor_box_number, im

return predictions

class YoloV3TinyIO(yolo):
def __init__(self, args, io_model_wrapper, transformer):
super().__init__(args, io_model_wrapper, transformer)
self.load_labels_map('mscoco_names.txt')

def _get_anchors(self):
return [
((81, 82), (135, 169), (344, 319)),
((23, 27), (37, 58), (81, 82)),
]

def _get_shapes(self):
return [
(3, 85, 13, 13),
(3, 85, 26, 26),
]

def process_output(self, result, log):
if self._is_result_invalid(result):
log.warning('Model output is processed only for the number iteration = 1')
return

anchors = self._get_anchors()
shapes = self._get_shapes()
outputs = [
result.get("conv2d_9/Conv2D/YoloRegion"),
result.get("conv2d_12/Conv2D/YoloRegion"),
]

if outputs[0] is None or outputs[1] is None:
print("Expected output layers not found in the result")
return

input_layer_name = next(iter(self._input))
input_ = self._input[input_layer_name]
ib, h, w, c = input_.shape

b = outputs[0].shape[0]
images = np.empty((b, h, w, c), dtype=input_.dtype)

for i in range(b):
images[i] = input_[i % ib]

for batch in range(b):
image = images[batch].copy()
predictions = []
orig_h, orig_w = self._original_shapes[next(iter(self._original_shapes))][batch % ib]
scales = {'W': orig_w / w, 'H': orig_h / h}

for output, shape, anchor_set in zip(outputs, shapes, anchors):
num_anchors, num_attributes, grid_size_x, grid_size_y = shape
output = output[batch].reshape(num_anchors, num_attributes, grid_size_x, grid_size_y)

for anchor_idx in range(num_anchors):
for cx in range(grid_size_x):
for cy in range(grid_size_y):
detection = output[anchor_idx, :, cy, cx]
prediction = self._get_cell_predictions(
cx, cy, grid_size_x, grid_size_y,
detection, anchor_idx,
h, w,
anchor_set,
scales
)

if prediction:
predictions.extend(prediction)

valid_detections = self._non_max_supression(predictions, self._threshold, 0.2)

processed_image = self._print_detections(
valid_detections,
self._labels_map,
image,
scales,
(orig_w, orig_h),
batch,
log,
)

out_img = Path(__file__).parent / f'out_detection_{batch + 1}.bmp'
cv2.imwrite(str(out_img), processed_image)
log.info(f"Result image was saved to {out_img}")

def _get_cell_predictions(self, cx, cy, dx, dy, detection, anchor_box_number, image_height, image_width, anchors, scales):
tx, ty, tw, th, box_score = detection[:5]
class_logits = detection[5:]

bbox_center_x = (cx + self._sigmoid(tx)) * (image_width / dx)
bbox_center_y = (cy + self._sigmoid(ty)) * (image_height / dy)

prior_width, prior_height = anchors[anchor_box_number]
bbox_width = np.exp(tw) * prior_width * scales['W']
bbox_height = np.exp(th) * prior_height * scales['H']

box_confidence = self._sigmoid(box_score)

class_probs = self._sigmoid(class_logits)
class_confidences = box_confidence * class_probs

predictions = []
for class_id, confidence in enumerate(class_confidences):
if confidence >= 0.5:
bbox = [
float(bbox_center_x - bbox_width / 2),
float(bbox_center_y - bbox_height / 2),
float(bbox_width),
float(bbox_height),
]
predictions.append([confidence, class_id, bbox])

return predictions if predictions else None


class YoloV7(IOAdapter):
def process_output(self, result, log, threshold=0.5):
Expand Down

0 comments on commit a8b22fa

Please sign in to comment.