Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Benchmarks (MovieChat1k and VDC) and VLMs (MovieChat and AuroraCap) #719

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def main():
if args.judge is not None:
judge_kwargs['model'] = args.judge
else:
if dataset.TYPE in ['MCQ', 'Y/N']:
if dataset.TYPE in ['MCQ', 'Y/N'] or listinstr(['moviechat1k'], dataset_name.lower()):
judge_kwargs['model'] = 'chatgpt-0125'
elif listinstr(['MMVet', 'LLaVABench', 'MMBench-Video'], dataset_name):
judge_kwargs['model'] = 'gpt-4-turbo'
Expand Down
51 changes: 34 additions & 17 deletions vlmeval/api/hf_chat_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def get_gpu_num(model_name):
kws = {
8: ['65b', '70b'],
4: ['30b', '33b', '35b', '40b'],
2: ['13b', '14b', '20b'],
2: ['13b', '14b', '20b', '8b'],
1: ['6b', '7b', 'moss'],
}
for k in [8, 4, 2, 1]:
Expand All @@ -26,7 +26,8 @@ def get_gpu_num(model_name):
'THUDM/chatglm2-6b', 'THUDM/chatglm2-6b-32k', 'THUDM/chatglm3-6b', 'THUDM/chatglm3-6b-32k',
'baichuan-inc/Baichuan2-7B-Chat', 'baichuan-inc/Baichuan2-13B-Chat',
'lmsys/vicuna-7b-v1.5', 'lmsys/vicuna-13b-v1.5',
'meta-llama/Llama-2-7b-chat-hf'
'meta-llama/Llama-2-7b-chat-hf',
'meta-llama/Llama-3.1-8B-Instruct'
]
Auto_model = ['chatglm']

Expand Down Expand Up @@ -65,15 +66,14 @@ def __init__(self,
**kwargs):

self.logger = get_logger('HFChatModel')
if 'vicuna' in model_path.lower():
if 'vicuna' in model_path.lower() or 'llama' in model_path.lower():
try:
from fastchat.model import get_conversation_template
except Exception as err:
self.logger.critical('Please install fastchat first to use vicuna. ')
raise err

self.explicit_device = kwargs.pop('device', None)

if self.explicit_device is None:
# If CUDA_VISIBLE_DEVICES is not properly set
if 'CUDA_VISIBLE_DEVICES' not in os.environ or os.environ['CUDA_VISIBLE_DEVICES'] == '0,1,2,3,4,5,6,7':
Expand All @@ -93,7 +93,6 @@ def __init__(self,
LoadModel = AutoModel
else:
LoadModel = AutoModelForCausalLM

assert osp.exists(model_path) or len(model_path.split('/')) == 2

device = self.explicit_device if self.explicit_device else 'auto'
Expand All @@ -105,20 +104,36 @@ def __init__(self,
precision = {'torch_dtype': torch.bfloat16}

self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = LoadModel.from_pretrained(model_path, trust_remote_code=True, device_map='cpu', **precision)
model = model.eval()
cuda_devices = os.environ.get('CUDA_VISIBLE_DEVICES', '0')
if ',' in cuda_devices:
device_ids = [int(x) for x in cuda_devices.split(',')]
device_map = {i: i for i in range(len(device_ids))}
else:
device_map = {'': 0}

if 'llama' in self.model_path.lower():
from lmdeploy import pipeline, GenerationConfig, TurbomindEngineConfig
print(f"Loading model {model_path} with {num_gpu} GPUs")
backend_config = TurbomindEngineConfig(tp=num_gpu)
self.gen_config = GenerationConfig(max_new_tokens=256)
model = pipeline(model_path,
backend_config=backend_config)
else:
model = LoadModel.from_pretrained(model_path, trust_remote_code=True, device_map='cpu', **precision)
model = model.eval()

if device != 'cpu':
model = model.to(f'cuda:{device}' if isinstance(device, int) else 'cuda')
try:
model.generation_config = GenerationConfig.from_pretrained(
model_path, trust_remote_code=True, device_map=device)
except Exception as err:
self.logger.warning(f'{type(err)}: {err}')
if device != 'cpu':
model = model.to(f'cuda:{device}' if isinstance(device, int) else 'cuda')
try:
model.generation_config = GenerationConfig.from_pretrained(
model_path, trust_remote_code=True, device_map=device)
except Exception as err:
self.logger.warning(f'{type(err)}: {err}')

self.context_length = self._get_context_length_robust(model=model, model_path=model_path)

torch.cuda.empty_cache()
self.model = model
self.context_length = self._get_context_length_robust(model=model, model_path=model_path)
self.answer_buffer = 192
self.system_prompt = system_prompt
for k, v in kwargs.items():
Expand Down Expand Up @@ -149,7 +164,9 @@ def generate_str(self, input, **kwargs):
outputs[0][len(inputs['input_ids'][0]):],
skip_special_tokens=True,
spaces_between_special_tokens=False)

elif 'llama' in self.model_path.lower():
prompt = [{'role': 'system', 'content': self.system_prompt}, {'role': 'user', 'content': input}]
resp = self.model(prompt, gen_config=self.gen_config).text
else:
params = self.kwargs
params.update(kwargs)
Expand All @@ -165,7 +182,6 @@ def length_ok(self, inputs):

def generate_list(self, full_inputs, offset=0, **kwargs):
assert isinstance(full_inputs, list)

inputs = full_inputs[offset:]
if not self.length_ok(inputs):
return self.chat(full_inputs, offset + 1)
Expand Down Expand Up @@ -244,3 +260,4 @@ def generate(self, inputs, **kwargs):
return self.generate_str(inputs, **kwargs)
elif isinstance(inputs, list):
return self.generate_list(inputs, **kwargs)

7 changes: 6 additions & 1 deletion vlmeval/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
'PLLaVA-7B': partial(PLLaVA, model_path='ermu2001/pllava-7b', dir_root=PLLaVA_ROOT),
'PLLaVA-13B': partial(PLLaVA, model_path='ermu2001/pllava-13b', dir_root=PLLaVA_ROOT),
'PLLaVA-34B': partial(PLLaVA, model_path='ermu2001/pllava-34b', dir_root=PLLaVA_ROOT),
"MovieChat": partial(MovieChat, model_path='Enxin/MovieChat-vicuna', short_memory_length=18, long_memory_length=256)
}

ungrouped = {
Expand Down Expand Up @@ -403,6 +404,10 @@
"Aria": partial(Aria, model_path='rhymes-ai/Aria')
}

aurora_series = {
"AuroraCap": partial(AuroraCap, model_path='wchai/AuroraCap-7B-VID-xtuner', token_merge_ratio=0.4)
}

h2ovl_series = {
'h2ovl-mississippi-2b': partial(H2OVLChat, model_path='h2oai/h2ovl-mississippi-2b'),
'h2ovl-mississippi-1b': partial(H2OVLChat, model_path='h2oai/h2ovl-mississippi-800m'),
Expand All @@ -422,7 +427,7 @@
cambrian_series, chameleon_series, video_models, ovis_series, vila_series,
mantis_series, mmalaya_series, phi3_series, xgen_mm_series, qwen2vl_series,
slime_series, eagle_series, moondream_series, llama_series, molmo_series,
kosmos_series, points_series, nvlm_series, vintern_series, h2ovl_series, aria_series,
kosmos_series, points_series, nvlm_series, vintern_series, h2ovl_series, aria_series, aurora_series,
smolvlm_series, sail_series, valley_series, vita_series
]

Expand Down
5 changes: 3 additions & 2 deletions vlmeval/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
from .video_concat_dataset import ConcatVideoDataset
from .mmgenbench import MMGenBench
from .cgbench import CGBench_MCQ_Grounding_Mini, CGBench_OpenEnded_Mini, CGBench_MCQ_Grounding, CGBench_OpenEnded

from .moviechat1k import MovieChat1k
from .vdc import VDC
from .miabench import MIABench
from .cmmmu import CMMMU
from .wildvision import WildVision
Expand Down Expand Up @@ -138,7 +139,7 @@ def evaluate(self, eval_file, **judge_kwargs):
]

VIDEO_DATASET = [
MMBenchVideo, VideoMME, MVBench, MVBench_MP4, LongVideoBench,
MMBenchVideo, VideoMME, MVBench, MVBench_MP4, LongVideoBench, VDC, MovieChat1k,
MLVU, MLVU_MCQ, MLVU_OpenEnded,
TempCompass, TempCompass_MCQ, TempCompass_Captioning, TempCompass_YorN,
CGBench_MCQ_Grounding_Mini, CGBench_OpenEnded_Mini, CGBench_MCQ_Grounding, CGBench_OpenEnded
Expand Down
Loading