Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Benchmark] Support Video MCQ with TaskMeAnything-v1-video-random as an example #359

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
4 changes: 3 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def main():

args = parse_args()
assert len(args.data), '--data should be a list of data files'

if args.retry is not None:
for k, v in supported_VLM.items():
if hasattr(v, 'keywords') and 'retry' in v.keywords:
Expand Down Expand Up @@ -89,6 +88,9 @@ def main():
continue

result_file = f'{pred_root}/{model_name}_{dataset_name}.xlsx'
if dataset_name in ['TaskMeAnything_v1_videoqa_random']:
packstr = 'pack' if args.pack else 'nopack'
result_file = f'{pred_root}/{model_name}_{dataset_name}_{args.nframe}frame_{packstr}.xlsx'
if dataset_name in ['MMBench-Video']:
packstr = 'pack' if args.pack else 'nopack'
result_file = f'{pred_root}/{model_name}_{dataset_name}_{args.nframe}frame_{packstr}.xlsx'
Expand Down
3 changes: 2 additions & 1 deletion vlmeval/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .mmbench_video import MMBenchVideo
from .text_mcq import CustomTextMCQDataset, TextMCQDataset
from .videomme import VideoMME
from .video_mcq import VideoMCQDataset
from .utils import *
from ..smp import *

Expand All @@ -23,7 +24,7 @@
]

VIDEO_DATASET = [
MMBenchVideo, VideoMME
MMBenchVideo, VideoMME, VideoMCQDataset
]

TEXT_DATASET = [
Expand Down
38 changes: 17 additions & 21 deletions vlmeval/dataset/image_mcq.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@


class ImageMCQDataset(ImageBaseDataset):

TYPE = 'MCQ'

DATASET_URL = {
Expand All @@ -16,14 +15,14 @@ class ImageMCQDataset(ImageBaseDataset):
'MMBench_DEV_CN': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_DEV_CN.tsv',
'MMBench_TEST_CN': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_TEST_CN.tsv',
'MMBench': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench.tsv', # Internal Only
'MMBench_CN': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_CN.tsv', # Internal Only
'MMBench_CN': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_CN.tsv', # Internal Only
# MMBench v1.1
'MMBench_DEV_EN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_DEV_EN_V11.tsv',
'MMBench_TEST_EN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_TEST_EN_V11.tsv',
'MMBench_DEV_CN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_DEV_CN_V11.tsv',
'MMBench_TEST_CN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_TEST_CN_V11.tsv',
'MMBench_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_V11.tsv', # Internal Only
'MMBench_CN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_CN_V11.tsv', # Internal Only
'MMBench_CN_V11': 'https://opencompass.openxlab.space/utils/VLMEval/MMBench_CN_V11.tsv', # Internal Only
# SEEDBench Series
'SEEDBench_IMG': 'https://opencompass.openxlab.space/utils/VLMEval/SEEDBench_IMG.tsv',
'SEEDBench2': 'https://huggingface.co/datasets/VLMEval/SEEDBench2/resolve/main/SEEDBench2.tsv',
Expand Down Expand Up @@ -66,14 +65,14 @@ class ImageMCQDataset(ImageBaseDataset):
'MMBench_DEV_CN': '08b8fc3324a5ed74155350f57be69fbd',
'MMBench_TEST_CN': '7e1239baf0ee4c8b513e19705a0f317e',
'MMBench': '4115aea3383f3dd0083be6a633e0f820', # Internal Only
'MMBench_CN': '2e053ffc90ea598b1feae13c36dc13ee', # Internal Only
'MMBench_CN': '2e053ffc90ea598b1feae13c36dc13ee', # Internal Only
# MMBench v1.1
'MMBench_DEV_EN_V11': '30c05be8f2f347a50be25aa067248184',
'MMBench_TEST_EN_V11': '26f0f15381a21720255091d3e0316ce6',
'MMBench_DEV_CN_V11': '593f9b5f6bea453d870a798b34ae4f37',
'MMBench_TEST_CN_V11': '74bbe4556dac745613c7cbe5ad787050',
'MMBench_V11': 'b9276414f57af1308dcc4d0cd9b42e7c', # Internal Only
'MMBench_CN_V11': '95f6980dd1b4de38e3cbffe0305a3f25', # Internal Only
'MMBench_CN_V11': '95f6980dd1b4de38e3cbffe0305a3f25', # Internal Only
# SEEDBench
'SEEDBench_IMG': '68017231464752261a2526d6ca3a10c0',
'SEEDBench2': '4ec15cf864c4f16274112284f531813e',
Expand Down Expand Up @@ -103,11 +102,10 @@ class ImageMCQDataset(ImageBaseDataset):
'RealWorldQA': '92321028d2bc29040284b6674721e48f',
'MLLMGuard_DS': '975fc0dd7119386e198c37d71e274b3f',
'BLINK': '3b6649b6a662184ea046908e5506260e',
'TaskMeAnything_v1_imageqa_random': '93b7290b447ef947f3b3abae5ad4bc1b'
'TaskMeAnything_v1_imageqa_random': '023fef69e2ca21827afb77c5ec3bc889',
}

def build_prompt(self, line):

if isinstance(line, int):
line = self.data.iloc[line]

Expand All @@ -117,11 +115,7 @@ def build_prompt(self, line):
tgt_path = self.dump_image(line)

question = line['question']
options = {
cand: line[cand]
for cand in string.ascii_uppercase
if cand in line and not pd.isna(line[cand])
}
options = {cand: line[cand] for cand in string.ascii_uppercase if cand in line and not pd.isna(line[cand])}
options_prompt = 'Options:\n'
for key, item in options.items():
options_prompt += f'{key}. {item}\n'
Expand All @@ -145,10 +139,13 @@ def build_prompt(self, line):

def evaluate(self, eval_file, **judge_kwargs):
from .utils.multiple_choice import report_acc, report_acc_MMT, mcq_circular_eval, mcq_vanilla_eval

# assert dataset is not None
dataset_map = {
'MMBench_TEST_EN': 'MMBench', 'MMBench_TEST_EN_V11': 'MMBench_V11',
'MMBench_TEST_CN': 'MMBench_CN', 'MMBench_TEST_CN_V11': 'MMBench_CN_V11'
'MMBench_TEST_EN': 'MMBench',
'MMBench_TEST_EN_V11': 'MMBench_V11',
'MMBench_TEST_CN': 'MMBench_CN',
'MMBench_TEST_CN_V11': 'MMBench_CN_V11',
}
dataset = self.dataset_name
if dataset in dataset_map:
Expand Down Expand Up @@ -193,9 +190,7 @@ def evaluate(self, eval_file, **judge_kwargs):
meta_q_map = {x: y for x, y in zip(meta['index'], meta['question'])}
data_map = {x: y for x, y in zip(data['index'], data['question'])}
for k in data_map:
assert k in meta_q_map, (
f'eval_file should be the same as or a subset of dataset {self.dataset_name}'
)
assert k in meta_q_map, f'eval_file should be the same as or a subset of dataset {self.dataset_name}'

if circular:
data = mcq_circular_eval(model, data, meta, nproc, result_file, self.dataset_name)
Expand All @@ -216,14 +211,15 @@ def evaluate(self, eval_file, **judge_kwargs):
dump(acc, score_file)

if dataset == 'AesBench_VAL':
warnings.warn('Note that AesBench VAL is just a toy version of AesBench TEST. For full results, \
warnings.warn(
'Note that AesBench VAL is just a toy version of AesBench TEST. For full results, \
please evaluate on AesBench TEST. The AesBench TEST dataset is more than 20 times \
larger than the VAL dataset and the leaderboard results are based on AesBench TEST.')
larger than the VAL dataset and the leaderboard results are based on AesBench TEST.'
)
return acc


class MMMUDataset(ImageMCQDataset):

DATASET_URL = {
'MMMU_DEV_VAL': 'https://opencompass.openxlab.space/utils/VLMEval/MMMU_DEV_VAL.tsv',
'MMMU_TEST': 'https://opencompass.openxlab.space/utils/VLMEval/MMMU_TEST.tsv',
Expand Down Expand Up @@ -264,14 +260,14 @@ def build_prompt(self, line):


class CustomMCQDataset(ImageMCQDataset):

def load_data(self, dataset):
data_path = osp.join(LMUDataRoot(), f'{dataset}.tsv')

if file_size(data_path, 'GB') > 1:
local_path = data_path.replace('.tsv', '_local.tsv')
if not osp.exists(local_path) or os.environ.get('FORCE_LOCAL', None):
from ..tools import LOCALIZE

LOCALIZE(data_path, local_path)
data_path = local_path
return load(data_path)
15 changes: 15 additions & 0 deletions vlmeval/dataset/utils/video_mcq_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import base64


# video_mcq use base64 for mp4 video encoding and decoding.
# using this code to convert mp4 to base64
def mp4_to_base64(mp4_path):
try:
with open(mp4_path, 'rb') as video_file:
video_data = video_file.read()
base64_encoded_video = base64.b64encode(video_data).decode('utf-8')
return base64_encoded_video
except FileNotFoundError:
return 'The file was not found.'
except Exception as e:
return f'An error occurred: {e}'
93 changes: 73 additions & 20 deletions vlmeval/dataset/video_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,68 @@
from ..smp import *


class VideoBaseDataset:
def video_root_map(dataset):
return dataset


class VideoBaseDataset:
MODALITY = 'VIDEO'

def __init__(self,
dataset='MMBench-Video',
pack=False):
def __init__(self, dataset='MMBench-Video', pack=False, skip_novideo=True):
try:
import decord
except:
warnings.warn('Please install decord via `pip install decord`.')

self.dataset_name = dataset
ret = self.prepare_dataset(dataset)
assert ret is not None
lmu_root = LMUDataRoot()
self.frame_root = osp.join(lmu_root, 'images', dataset)
os.makedirs(self.frame_root, exist_ok=True)
self.frame_tmpl = 'frame-{}-of-{}.jpg'
# the init for previous two video dataset
if dataset in ['MMBench-Video', 'Video-MME']:
self.dataset_name = dataset
ret = self.prepare_dataset(dataset)
assert ret is not None
lmu_root = LMUDataRoot()
self.frame_root = osp.join(lmu_root, 'images', dataset)
os.makedirs(self.frame_root, exist_ok=True)
self.frame_tmpl = 'frame-{}-of-{}.jpg'

self.data_root = ret['root']
self.data_file = ret['data_file']
self.data = load(self.data_file)

assert 'question' in self.data and 'video' in self.data
videos = list(set(self.data['video']))
videos.sort()
self.videos = videos
self.pack = pack

# dataset init without prepare_dataset, just like image_base
else:
lmu_root = LMUDataRoot()
# You can override this variable to save image files to a different directory
self.dataset_name = dataset
self.frame_root = osp.join(lmu_root, 'images', dataset)
self.frame_tmpl = 'frame-{}-of-{}.jpg'
data, data_root = self.load_data(dataset)
self.data_root = data_root
self.meta_only = True
self.skip_novideo = skip_novideo
if skip_novideo and 'video' in data:
data = data[~pd.isna(data['video'])]

data['index'] = [str(x) for x in data['index']]
data['index'] = [str(x) for x in data['index']]

self.data_root = ret['root']
self.data_file = ret['data_file']
self.data = load(self.data_file)
if 'video' in data:
self.meta_only = False

assert 'question' in self.data and 'video' in self.data
videos = list(set(self.data['video']))
videos.sort()
self.videos = videos
self.pack = pack
if 'video_path' in data:
paths = [toliststr(x) for x in data['video_path']]
data['video_path'] = [x[0] if len(x) == 1 else x for x in paths]

if np.all([istype(x, int) for x in data['index']]):
data['index'] = [int(x) for x in data['index']]

self.data = data
self.post_build(dataset)

def __len__(self):
return len(self.videos) if self.pack else len(self.data)
Expand All @@ -44,6 +77,26 @@ def __getitem__(self, idx):
assert idx < len(self.data)
return dict(self.data.iloc[idx])

def load_data(self, dataset):
url = self.DATASET_URL[dataset]
file_md5 = self.DATASET_MD5[dataset]
return self.prepare_tsv(url, file_md5)

def prepare_tsv(self, url, file_md5=None):
data_root = LMUDataRoot()
os.makedirs(data_root, exist_ok=True)
file_name = url.split('/')[-1]
data_path = osp.join(data_root, file_name)
if osp.exists(data_path) and (file_md5 is None or md5(data_path) == file_md5):
pass
else:
warnings.warn('The dataset tsv is not downloaded')
download_file(url, data_path)
return load(data_path), data_root

def post_build(self, dataset):
pass

def frame_paths(self, video, num_frames=8):
frame_root = osp.join(self.frame_root, video)
os.makedirs(frame_root, exist_ok=True)
Expand All @@ -68,7 +121,7 @@ def save_video_frames(self, video, num_frames=8):
# Return a list of dataset names that are supported by this class, can override
@classmethod
def supported_datasets(cls):
return ['MMBench-Video', 'Video-MME']
return ['MMBench-Video', 'Video-MME'] + list(cls.DATASET_URL)

# Given the prediction file, return the evaluation results in the format of a dictionary or pandas dataframe
@abstractmethod
Expand Down
Loading
Loading