Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Support ActivityNetMeanAR #84

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mmeval/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import warnings

from .accuracy import Accuracy
from .anet_ar import ActivityNetAR
from .ava_map import AVAMeanAP
from .bleu import BLEU
from .coco_detection import COCODetection
Expand Down Expand Up @@ -42,7 +43,7 @@
'AveragePrecision', 'AVAMeanAP', 'BLEU', 'DOTAMeanAP',
'SumAbsoluteDifferences', 'GradientError', 'MattingMeanSquaredError',
'ConnectivityError', 'ROUGE', 'Perplexity', 'KeypointEndPointError',
'KeypointAUC', 'KeypointNME', 'NaturalImageQualityEvaluator'
'KeypointAUC', 'KeypointNME', 'NaturalImageQualityEvaluator', 'ActivityNetAR'
]

_deprecated_msg = (
Expand Down
295 changes: 295 additions & 0 deletions mmeval/metrics/anet_ar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import numpy as np
from collections import OrderedDict
from typing import Dict, List, Sequence, Tuple, Union

from mmeval.core.base_metric import BaseMetric

logger = logging.getLogger(__name__)

Comment on lines +9 to +10
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since PR #102, logger has became an argument in BaseMetric, so other metrics no longer need to define logger themselves.


def pairwise_temporal_iou(candidate_segments: np.ndarray,
target_segments: np.ndarray) -> np.ndarray:
"""Compute intersection over union between segments.

Args:
candidate_segments (np.ndarray): 1-dim/2-dim array in format
``[init, end]/[m x 2:=[init, end]]``.
target_segments (np.ndarray): 2-dim array in format
``[n x 2:=[init, end]]``.

Returns:
t_iou (np.ndarray): 1-dim array [n] /
2-dim array [n x m] with IoU ratio.
Comment on lines +23 to +24
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
t_iou (np.ndarray): 1-dim array [n] /
2-dim array [n x m] with IoU ratio.
t_iou (np.ndarray): 1-dim array [n] /
2-dim array [n x m] with IoU ratio.

"""
candidate_segments_ndim = candidate_segments.ndim
if target_segments.ndim != 2:
raise ValueError('Dimension of target segments is incorrect')
if candidate_segments_ndim not in [1, 2]:
raise ValueError('Dimension of candidate segments is incorrect')
Comment on lines +27 to +30
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if target_segments.ndim != 2:
raise ValueError('Dimension of target segments is incorrect')
if candidate_segments_ndim not in [1, 2]:
raise ValueError('Dimension of candidate segments is incorrect')
if target_segments.ndim != 2:
raise ValueError(f'Dimension of target segments is incorrect. Expected 2, got {target_segments.ndim}')
if candidate_segments_ndim not in [1, 2]:
raise ValueError(f'Dimension of candidate segments is incorrect. Expected 1 or 2, got {target_segments.ndim}')


if candidate_segments_ndim == 1:
candidate_segments = candidate_segments[np.newaxis, :]

num_targets = target_segments.shape[0]
num_candidates = candidate_segments.shape[0]
t_iou = np.empty((num_targets, num_candidates), dtype=np.float32)

for i in range(num_candidates):
candidate_segment = candidate_segments[i, :]
tt1 = np.maximum(candidate_segment[0], target_segments[:, 0])
tt2 = np.minimum(candidate_segment[1], target_segments[:, 1])
# Intersection including Non-negative overlap score.
segments_intersection = (tt2 - tt1).clip(0)
# Segment union.
segments_union = ((target_segments[:, 1] - target_segments[:, 0]) +
(candidate_segment[1] - candidate_segment[0]) -
segments_intersection)
# Compute overlap as the ratio of the intersection
# over union of two segments.
t_iou[:, i] = (segments_intersection.astype(float) / segments_union)

if candidate_segments_ndim == 1:
t_iou = np.squeeze(t_iou, axis=1)

return t_iou


def average_recall_at_avg_proposals(ground_truth,
proposals,
total_num_proposals,
max_avg_proposals=None,
temporal_iou_thresholds=np.linspace(
0.5, 0.95, 10)):
"""Computes the average recall given an average number (percentile) of
proposals per video.

Args:
ground_truth (dict): Dict containing the ground truth instances.
proposals (dict): Dict containing the proposal instances.
total_num_proposals (int): Total number of proposals in the
proposal dict.
max_avg_proposals (int, optional): Max number of proposals for one
video. Defaults to None.
temporal_iou_thresholds (np.ndarray): 1D array with temporal_iou
thresholds. Defaults to ``np.linspace(0.5, 0.95, 10)``.
Returns:
tuple([np.ndarray, np.ndarray, np.ndarray, float]):
(recall, average_recall, proposals_per_video, auc)
In recall, ``recall[i,j]`` is recall at i-th temporal_iou threshold
at the j-th average number (percentile) of average number of
proposals per video. The average_recall is recall averaged
over a list of temporal_iou threshold (1D array). This is
equivalent to ``recall.mean(axis=0)``. The ``proposals_per_video``
is the average number of proposals per video. The auc is the area
under ``AR@AN`` curve.
"""

total_num_videos = len(ground_truth)

if not max_avg_proposals:
max_avg_proposals = float(total_num_proposals) / total_num_videos

ratio = (max_avg_proposals * float(total_num_videos) / total_num_proposals)

# For each video, compute temporal_iou scores among the retrieved proposals
score_list = []
total_num_retrieved_proposals = 0
for video_id in ground_truth.keys():
# Get proposals for this video.
proposals_video_id = proposals[video_id]
this_video_proposals = proposals_video_id[:, :2]
# Sort proposals by score.
sort_idx = proposals_video_id[:, 2].argsort()[::-1]
this_video_proposals = this_video_proposals[sort_idx, :].astype(
np.float32)

# Get ground-truth instances associated to this video.
ground_truth_video_id = ground_truth[video_id]
this_video_ground_truth = ground_truth_video_id[:, :2].astype(
np.float32)
if this_video_proposals.shape[0] == 0:
score_list.append(np.zeros((this_video_ground_truth.shape[0], 1)))
continue

if this_video_proposals.ndim != 2:
this_video_proposals = np.expand_dims(this_video_proposals, axis=0)
if this_video_ground_truth.ndim != 2:
this_video_ground_truth = np.expand_dims(
this_video_ground_truth, axis=0)
Comment on lines +116 to +120
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When will these 2 if-statements be True? There has been

this_video_proposals = proposals_video_id[:, :2]

and

this_video_ground_truth = ground_truth_video_id[:, :2].astype(np.float32)

before.


num_retrieved_proposals = np.minimum(
int(this_video_proposals.shape[0] * ratio),
this_video_proposals.shape[0])
total_num_retrieved_proposals += num_retrieved_proposals
this_video_proposals = this_video_proposals[:
num_retrieved_proposals, :]

# Compute temporal_iou scores.
t_iou = pairwise_temporal_iou(this_video_proposals,
this_video_ground_truth)
score_list.append(t_iou)

# Given that the length of the videos is really varied, we
# compute the number of proposals in terms of a ratio of the total
# proposals retrieved, i.e. average recall at a percentage of proposals
# retrieved per video.

# Computes average recall.
pcn_list = np.arange(1, 101) / 100.0 * (
max_avg_proposals * float(total_num_videos) /
total_num_retrieved_proposals)
matches = np.empty((total_num_videos, pcn_list.shape[0]))
positives = np.empty(total_num_videos)
recall = np.empty((temporal_iou_thresholds.shape[0], pcn_list.shape[0]))
# Iterates over each temporal_iou threshold.
for ridx, temporal_iou in enumerate(temporal_iou_thresholds):
# Inspect positives retrieved per video at different
# number of proposals (percentage of the total retrieved).
for i, score in enumerate(score_list):
# Total positives per video.
positives[i] = score.shape[0]
# Find proposals that satisfies minimum temporal_iou threshold.
true_positives_temporal_iou = score >= temporal_iou
# Get number of proposals as a percentage of total retrieved.
pcn_proposals = np.minimum(
(score.shape[1] * pcn_list).astype(np.int32), score.shape[1])

for j, num_retrieved_proposals in enumerate(pcn_proposals):
# Compute the number of matches
# for each percentage of the proposals
matches[i, j] = np.count_nonzero(
(true_positives_temporal_iou[:, :num_retrieved_proposals]
).sum(axis=1))

# Computes recall given the set of matches per video.
recall[ridx, :] = matches.sum(axis=0) / positives.sum()

# Recall is averaged.
avg_recall = recall.mean(axis=0)

# Get the average number of proposals per video.
proposals_per_video = pcn_list * (
float(total_num_retrieved_proposals) / total_num_videos)
# Get AUC
area_under_curve = np.trapz(avg_recall, proposals_per_video)
auc = 100. * float(area_under_curve) / proposals_per_video[-1]
return recall, avg_recall, proposals_per_video, auc


class ActivityNetAR(BaseMetric):
"""ActivityNet evaluation metric. ActivityNet-1.3 dataset: http://activity-
net.org/download.html.

This metric computes AR under different Average Number of proposals (AR =
1, 5, 10, 100) as AR@1, AR@5, AR@10 and AR@100, and calculate the Area
under the AR vs. AN curve (AUC) as metrics.

temporal_iou_thresholds (np.array or List): the list of temporal iOU
thresholds. Defaults to np.linspace(0.5, 0.95, 10).
max_avg_proposals (int): the maximum of Average Number of proposals.
Defaults to 100.
**kwargs: Keyword parameters passed to :class:`BaseMetric`.


Examples:
>>> from mmeval import ActivityNetAR
>>> anet_metric = ActivityNetAR()
>>> predictions = [
>>> [
>>> [53, 68, 0.21],
>>> [38, 110, 0.54],
>>> [65, 128, 0.95],
>>> [69, 93, 0.98],
>>> [99, 147, 0.84],
>>> [28, 96, 0.84],
>>> [18, 92, 0.22],
>>> [40, 66, 0.36],
>>> [14, 29, 0.75],
>>> [67, 105, 0.25],
>>> [ 2, 7, 0.94],
>>> [25, 112, 0.49],
>>> [ 7, 83, 0.9 ],
>>> [75, 159, 0.42],
>>> [99, 176, 0.62],
>>> [89, 186, 0.56],
>>> [50, 200, 0.5 ],
>>> ],
>>> ]

>>> annotations = [
>>> [
>>> [30.02, 180],
>>> ]
>>> ]
>>> anet_metric(predictions, annotations)
OrderedDict([('auc', 54.2), ('AR@1', 0.0), ('AR@5', 0.0),
('AR@10', 0.2), ('AR@100', 0.6)])
"""

def __init__(self,
temporal_iou_thresholds: Union[np.array, List] = np.linspace(
0.5, 0.95, 10),
max_avg_proposals: int = 100,
**kwargs) -> None:
super().__init__(**kwargs)
self.temporal_iou_thresholds = np.array(temporal_iou_thresholds)
self.max_avg_proposals = max_avg_proposals
self.ground_truth: Dict[str, np.array] = {}

def add(self, predictions: Sequence, annotations: Sequence) -> None: # type: ignore # yapf: disable # noqa: E501
"""Add detection results to the results list.

Args:
predictions (Sequence): A list of predictions. Each prediction
is a list of proposals. A proposal is `[start, end, score]`.

annotations (Sequence): A list of annotations. The length of
`annotations` should equal to the length of `predictions`. Each
annotation is a list of ground truth segments. A segment is
`[start, end]`.
"""
for prediction, annotation in zip(predictions, annotations):
result = {'proposal_list': prediction, 'ground_truth': annotation}
self._results.append(result)

def compute_metric(self, results: Sequence[dict]) -> dict:
"""Compute the metrics from processed results.

Args:
results (list): The processed results of each batch.

Returns:
dict: The computed metrics. The keys are the names of the metrics,
and the values are corresponding results.
"""

eval_results = OrderedDict()
proposal, num_proposals, ground_truth = self._import_proposals(results)

recall, _, _, auc = average_recall_at_avg_proposals(
ground_truth,
proposal,
num_proposals,
max_avg_proposals=self.max_avg_proposals,
temporal_iou_thresholds=self.temporal_iou_thresholds)
eval_results['auc'] = auc
eval_results['AR@1'] = np.mean(recall[:, 0])
eval_results['AR@5'] = np.mean(recall[:, 4])
eval_results['AR@10'] = np.mean(recall[:, 9])
eval_results['AR@100'] = np.mean(recall[:, 99])

return eval_results

@staticmethod
def _import_proposals(results: Sequence[dict]) -> Tuple[dict, int, dict]:
"""Read predictions and ground_truths from results."""
all_proposals = {}
all_ground_truths = {}
num_proposals = 0
for idx, result in enumerate(results):
all_ground_truths[idx] = np.array(result['ground_truth'])
all_proposals[idx] = np.array(result['proposal_list'])
num_proposals += all_proposals[idx].shape[0]
return all_proposals, num_proposals, all_ground_truths
55 changes: 55 additions & 0 deletions tests/test_metrics/test_anet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Copyright (c) OpenMMLab. All rights reserved.
from numpy.testing import assert_array_almost_equal

from mmeval.metrics import ActivityNetAR

prediction1 = [
[[53, 68, 0.21], [38, 110, 0.54], [65, 128, 0.95], [69, 93, 0.98],
[99, 147, 0.84], [28, 96, 0.84], [18, 92, 0.22], [40, 66, 0.36],
[14, 29, 0.75], [67, 105, 0.25], [2, 7, 0.94], [25, 112, 0.49],
[7, 83, 0.9], [75, 159, 0.42], [99, 176, 0.62], [89, 186, 0.56],
[50, 200, 0.5]],
]

annotation1 = [[
[30.02, 180],
]]

prediction2 = [[[53, 68, 0.21], [38, 110, 0.54], [65, 128,
0.95], [69, 93, 0.98],
[99, 147, 0.84], [28, 96, 0.84], [4, 26, 0.42], [40, 66, 0.36],
[14, 29, 0.75], [67, 105, 0.25], [2, 7, 0.94], [25, 112, 0.49],
[7, 83, 0.9], [75, 159, 0.42], [99, 176, 0.62],
[89, 186, 0.56], [50, 200, 0.5]],
[[45, 88, 0.31], [34, 83, 0.54], [76, 113,
0.95], [34, 85, 0.88],
[99, 147, 0.84], [28, 96, 0.84], [4, 26, 0.42], [40, 66, 0.36],
[14, 29, 0.75], [67, 105, 0.25], [2, 7, 0.94], [25, 112, 0.49],
[7, 83, 0.9], [75, 159, 0.42], [99, 176, 0.62],
[89, 186, 0.56], [50, 200, 0.5]]]

annotation2 = [[
[38.2, 110.35],
], [[38.2, 110.35], [40, 66]]]


def test_activitynet_ar():
# stateless
anet_metric = ActivityNetAR()
output = anet_metric(prediction1, annotation1)
assert 'auc' in output
assert_array_almost_equal(output['auc'], 54.2)
for k in 1, 5, 10, 100:
assert f'AR@{k}' in output
assert 0 <= output[f'AR@{k}'] <= 1

# statefull
anet_metric = ActivityNetAR()
anet_metric.add(prediction1, annotation1)
anet_metric.add(prediction2, annotation2)
output = anet_metric.compute()
assert 'auc' in output
assert_array_almost_equal(output['auc'], 74.9625)
for k in 1, 5, 10, 100:
assert f'AR@{k}' in output
assert 0 <= output[f'AR@{k}'] <= 1