-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DOCS] model performance table data update (#27863)
port: #27862
- Loading branch information
1 parent
4efe53e
commit 5398964
Showing
4 changed files
with
293 additions
and
484 deletions.
There are no files selected for viewing
251 changes: 95 additions & 156 deletions
251
docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,156 +1,95 @@ | ||
Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec | ||
opt-125m-gptq,INT4-MIXED,32,965.9,29,7.7,129.87 | ||
opt-125m-gptq,INT4-MIXED,1024,1507.9,113.1,7.8,128.21 | ||
tiny-llama-1.1b-chat,INT4-MIXED,32,1831.8,46.5,16.7,59.88 | ||
tiny-llama-1.1b-chat,INT4-MIXED,1024,1806.3,635,17.8,56.18 | ||
qwen2-0.5b,INT4-MIXED,32,2551.7,61.4,18.3,54.64 | ||
qwen2-0.5b,INT4-MIXED,1024,2976.6,356.1,19.2,52.08 | ||
tiny-llama-1.1b-chat,INT8-CW,32,1987.4,56,21.6,46.30 | ||
tiny-llama-1.1b-chat,INT8-CW,1024,2209.1,772.7,22.6,44.25 | ||
qwen2-0.5b,INT8-CW,32,2484.9,57.3,22.8,43.86 | ||
qwen2-0.5b,INT8-CW,1024,3102.5,407.1,23.9,41.84 | ||
qwen2-1.5b,INT4-MIXED,32,4265.2,71.7,25.5,39.22 | ||
qwen2-1.5b,INT4-MIXED,1024,4884.5,862.4,26.8,37.31 | ||
dolly-v2-3b,INT4-MIXED,32,2401.3,89.6,27.5,36.36 | ||
red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2511.5,78.6,28.2,35.46 | ||
phi-2,INT4-MIXED,32,2279.5,95.7,29.1,34.36 | ||
minicpm-1b-sft,INT4-MIXED,31,2759.9,104.4,30.9,32.36 | ||
phi-2,INT4-MIXED,32,2620.1,100.8,31,32.26 | ||
stable-zephyr-3b-dpo,INT4-MIXED,30,2636.5,86.8,31.7,31.55 | ||
dolly-v2-3b,INT4-MIXED,1024,3137.1,1782.9,32.2,31.06 | ||
red-pajama-incite-chat-3b-v1,INT4-MIXED,1020,3118.5,1831.7,33.3,30.03 | ||
red-pajama-incite-chat-3b-v1,INT4-MIXED,1024,2862.7,1821.1,33.5,29.85 | ||
qwen2-1.5b,INT8-CW,32,4831.2,87,33.8,29.59 | ||
opt-2.7b,INT4-MIXED,31,2898.3,73.2,33.9,29.50 | ||
phi-2,INT4-MIXED,1024,2797.4,1887,34,29.41 | ||
orca-mini-3b,INT4-MIXED,32,2877.8,100.3,35,28.57 | ||
stablelm-3b-4e1t,INT4-MIXED,32,2669.4,94.7,35.3,28.33 | ||
qwen2-1.5b,INT8-CW,1024,5455.8,1047.6,35.3,28.33 | ||
minicpm-1b-sft,INT8-CW,31,3104.1,103.5,35.3,28.33 | ||
phi-2,INT4-MIXED,1024,3039.8,1917.4,35.9,27.86 | ||
stable-zephyr-3b-dpo,INT4-MIXED,946,3411.4,1695,37,27.03 | ||
gemma-2b-it,INT4-MIXED,32,3991.7,116.1,37.9,26.39 | ||
opt-2.7b,INT4-MIXED,937,3617.5,1764.9,38.2,26.18 | ||
phi-3-mini-4k-instruct,INT4-MIXED,31,2935.3,111.6,38.2,26.18 | ||
phi-3-mini-4k-instruct,INT4-MIXED,38,3102.4,134,38.4,26.04 | ||
phi-3-mini-4k-instruct,INT4-MIXED,31,2986.1,114.1,38.9,25.71 | ||
phi-3-mini-4k-instruct,INT4-MIXED,38,2977.4,131.1,39,25.64 | ||
gemma-2b-it,INT4-MIXED,1024,4973.3,1249.2,39.7,25.19 | ||
stablelm-3b-4e1t,INT4-MIXED,1024,3196.9,2045.4,39.9,25.06 | ||
dolly-v2-3b,INT8-CW,32,3490.2,107.4,41.5,24.10 | ||
red-pajama-incite-chat-3b-v1,INT8-CW,32,3457.9,105,42.5,23.53 | ||
opt-2.7b,INT8-CW,31,3686.8,107.5,44.1,22.68 | ||
phi-2,INT8-CW,32,3554.9,116.6,44.1,22.68 | ||
phi-3-mini-4k-instruct,INT4-MIXED,1023,3390.7,2277.1,44.2,22.62 | ||
phi-3-mini-4k-instruct,INT4-MIXED,1061,3643.6,2485,44.4,22.52 | ||
phi-3-mini-4k-instruct,INT4-MIXED,1023,3516.4,2280.9,44.5,22.47 | ||
phi-3-mini-4k-instruct,INT4-MIXED,1061,3537.2,2522.4,44.7,22.37 | ||
orca-mini-3b,INT4-MIXED,1024,3557.3,1898.9,45,22.22 | ||
minicpm-1b-sft,FP16,31,3814.4,97.9,45.4,22.03 | ||
stablelm-3b-4e1t,INT8-CW,32,3486.9,100.5,46.1,21.69 | ||
stable-zephyr-3b-dpo,INT8-CW,30,3516.7,101.9,46.1,21.69 | ||
dolly-v2-3b,INT8-CW,1024,4265.9,2178.6,46.2,21.65 | ||
red-pajama-incite-chat-3b-v1,INT8-CW,1020,3979.1,2219.7,47.2,21.19 | ||
red-pajama-incite-chat-3b-v1,INT8-CW,1024,3975.5,2199.7,47.3,21.14 | ||
opt-2.7b,INT8-CW,937,4358.6,1981.8,48.4,20.66 | ||
phi-2,INT8-CW,1024,4058.1,2280.1,48.9,20.45 | ||
gemma-2b-it,INT8-CW,32,4786.8,119.8,49.4,20.24 | ||
chatglm3-6b,INT4-MIXED,32,4141.5,166.6,49.7,20.12 | ||
stablelm-3b-4e1t,INT8-CW,1024,4054.8,2243.5,50.7,19.72 | ||
stable-zephyr-3b-dpo,INT8-CW,946,4521.8,1816.4,51.3,19.49 | ||
gemma-2b-it,INT8-CW,1024,5810.7,1580,51.3,19.49 | ||
chatglm3-6b,INT4-MIXED,32,4651.4,164.7,51.6,19.38 | ||
chatglm3-6b,INT4-MIXED,1024,4235.1,2818.7,52.3,19.12 | ||
orca-mini-3b,INT8-CW,32,4162,109.2,53.3,18.76 | ||
chatglm3-6b,INT4-MIXED,1024,4783.8,2869,54.4,18.38 | ||
gpt-j-6b,INT4-MIXED,32,4667.3,176.7,56.3,17.76 | ||
chatglm3-6b-gptq,INT4-MIXED,32,5369.4,173.9,58.9,16.98 | ||
llama-2-7b-chat-hf,INT4-MIXED,32,4280,173.2,60.1,16.64 | ||
phi-3-mini-4k-instruct,INT8-CW,31,4585.1,123,60.5,16.53 | ||
phi-3-mini-4k-instruct,INT8-CW,38,4597,152,60.5,16.53 | ||
chatglm2-6b,INT4-MIXED,32,4847.8,158.7,60.6,16.50 | ||
vicuna-7b-v1.5,INT4-MIXED,32,4476.9,178.2,61.2,16.34 | ||
chatglm3-6b-gptq,INT4-MIXED,1024,5217.6,2863.7,61.3,16.31 | ||
mistral-7b-v0.1,INT4-MIXED,31,4413.6,194,61.7,16.21 | ||
qwen2-7b,INT4-MIXED,32,7044.7,184.4,61.7,16.21 | ||
mistral-7b-v0.1,INT4-MIXED,32,4427.6,193.3,61.8,16.18 | ||
orca-mini-3b,INT8-CW,1024,4821.6,2239.1,62,16.13 | ||
codegen25-7b,INT4-MIXED,32,4687.2,176.2,62.7,15.95 | ||
chatglm2-6b,INT4-MIXED,1024,5165.9,3148,63,15.87 | ||
llama-2-7b-gptq,INT4-MIXED,32,4632.8,175.2,63.4,15.77 | ||
stablelm-7b,INT4-MIXED,32,5219.5,206.3,63.4,15.77 | ||
qwen-7b-chat,INT4-MIXED,32,7805.6,193.8,63.6,15.72 | ||
gpt-j-6b,INT4-MIXED,1024,5314.9,3111.8,63.6,15.72 | ||
qwen2-7b,INT4-MIXED,1024,7716.2,3548.3,64.1,15.60 | ||
llama-3-8b,INT4-MIXED,32,4910.9,204.8,64.7,15.46 | ||
mistral-7b-v0.1,INT4-MIXED,1024,4720.8,3667.1,64.8,15.43 | ||
mistral-7b-v0.1,INT4-MIXED,1007,4704.7,3685.4,64.9,15.41 | ||
llama-3.1-8b,INT4-MIXED,31,4850.3,211.5,64.9,15.41 | ||
phi-3-mini-4k-instruct,INT8-CW,1023,5128.6,2815.2,65.7,15.22 | ||
phi-3-mini-4k-instruct,INT8-CW,1061,5155,3407.9,65.9,15.17 | ||
mistral-7b-v0.1,INT4-MIXED,32,4939.3,192,66.5,15.04 | ||
llama-3-8b,INT4-MIXED,33,4919.4,261.9,67.2,14.88 | ||
llama-2-7b-chat-hf,INT4-MIXED,1024,4948.2,3811,67.3,14.86 | ||
qwen1.5-7b-chat,INT4-MIXED,32,5943.1,180.5,67.7,14.77 | ||
qwen-7b-chat-gptq,INT4-MIXED,32,8057,187,68.1,14.68 | ||
llama-3-8b,INT4-MIXED,32,5503.5,198.4,68.1,14.68 | ||
qwen-7b-chat,INT4-MIXED,32,8091.6,185.9,68.1,14.68 | ||
llama-3-8b,INT4-MIXED,1024,5569.1,3920.5,68.2,14.66 | ||
llama-3.1-8b,INT4-MIXED,31,5358.6,201,68.2,14.66 | ||
stablelm-7b,INT4-MIXED,1020,5804.4,3726.6,68.8,14.53 | ||
llama-3.1-8b,INT4-MIXED,31,5452.6,202.9,68.8,14.53 | ||
llama-2-7b-chat-hf,INT4-MIXED,32,5023,165.7,69,14.49 | ||
llama-3-8b,INT4-MIXED,32,5413.6,202,69.1,14.47 | ||
llama-3-8b,INT4-MIXED,33,5440.4,262.1,69.2,14.45 | ||
codegen25-7b,INT4-MIXED,1024,5434.6,3513.2,69.9,14.31 | ||
mistral-7b-v0.1,INT4-MIXED,1024,5614.9,3819.1,70,14.29 | ||
mistral-7b-v0.1,INT4-MIXED,31,4927.8,205,70.5,14.18 | ||
llama-3-8b,INT4-MIXED,33,5498.9,270.7,70.6,14.16 | ||
llama-3-8b,INT4-MIXED,1025,5577.4,4271.2,70.6,14.16 | ||
llama-2-7b-gptq,INT4-MIXED,1024,5302.2,3529.4,70.7,14.14 | ||
zephyr-7b-beta,INT4-MIXED,32,5212.4,190.6,71.2,14.04 | ||
llama-3-8b,INT4-MIXED,1024,6161.1,3918,71.5,13.99 | ||
llama-3-8b,INT4-MIXED,1025,6098,4441.8,72.3,13.83 | ||
llama-3-8b,INT4-MIXED,1024,6071.7,3972.2,72.4,13.81 | ||
mistral-7b-v0.1,INT4-MIXED,1007,5224.1,4153.4,73.8,13.55 | ||
llama-3-8b,INT4-MIXED,1025,6156.9,4357,73.9,13.53 | ||
zephyr-7b-beta,INT4-MIXED,1024,5511.6,3978,74.4,13.44 | ||
opt-2.7b,FP16,31,9220.3,107.8,74.7,13.39 | ||
dolly-v2-3b,FP16,32,6058.9,109.9,74.7,13.39 | ||
qwen1.5-7b-chat,INT4-MIXED,1024,7063.2,3791.7,75,13.33 | ||
qwen-7b-chat,INT4-MIXED,1024,8919.5,3763.9,75,13.33 | ||
red-pajama-incite-chat-3b-v1,FP16,32,6036.5,107.5,75.9,13.18 | ||
llama-2-7b-chat-hf,INT4-MIXED,1024,5716.8,4231.7,76.2,13.12 | ||
phi-2,FP16,32,6090.1,115.2,77.1,12.97 | ||
stable-zephyr-3b-dpo,FP16,30,6113.1,112.1,78.6,12.72 | ||
qwen-7b-chat,INT4-MIXED,1024,9212.9,3857.4,78.6,12.72 | ||
stablelm-3b-4e1t,FP16,32,6065.4,110.2,78.7,12.71 | ||
opt-2.7b,FP16,937,9733.8,3750.8,78.8,12.69 | ||
dolly-v2-3b,FP16,1024,6615.2,2230.9,79.1,12.64 | ||
red-pajama-incite-chat-3b-v1,FP16,1020,6588.3,2259.4,80.2,12.47 | ||
glm-4-9b,INT4-MIXED,33,6386.2,328,80.4,12.44 | ||
red-pajama-incite-chat-3b-v1,FP16,1024,6570.3,2268.7,80.4,12.44 | ||
baichuan2-7b-chat,INT4-MIXED,32,5977.9,201.7,81,12.35 | ||
glm-4-9b,INT4-MIXED,32,6389.7,248.1,81,12.35 | ||
phi-2,FP16,1024,6646.2,2406.7,81.4,12.29 | ||
stable-zephyr-3b-dpo,FP16,946,6875.7,1868.2,82.9,12.06 | ||
stablelm-3b-4e1t,FP16,1024,6636.1,2036.9,83,12.05 | ||
chatglm2-6b,INT8-CW,32,6731.8,159.2,84.4,11.85 | ||
glm-4-9b,INT4-MIXED,1025,7061.4,4939.2,85.2,11.74 | ||
qwen-7b-chat-gptq,INT4-MIXED,1024,9175.3,3898,85.3,11.72 | ||
gemma-7b-it,INT4-MIXED,32,7883.9,230.5,86,11.63 | ||
gemma-7b-it,INT4-MIXED,32,8002.6,235,86.1,11.61 | ||
glm-4-9b,INT4-MIXED,1024,7064.9,4411.2,86.2,11.60 | ||
gpt-j-6b,INT8-CW,32,7009.2,176.8,86.4,11.57 | ||
chatglm2-6b,INT8-CW,1024,7050.5,3871.6,86.8,11.52 | ||
chatglm3-6b,INT8-CW,32,6755.9,159,86.8,11.52 | ||
baichuan2-7b-chat,INT4-MIXED,1024,7033.3,4049,88.8,11.26 | ||
chatglm3-6b,INT8-CW,1024,7076.5,3865.9,89.2,11.21 | ||
qwen-7b-chat,INT4-MIXED,32,9245.7,176.3,90,11.11 | ||
gemma-7b-it,INT4-MIXED,1024,9449.4,4305.8,93.2,10.73 | ||
gpt-j-6b,INT8-CW,1024,7672.3,4181.1,93.5,10.70 | ||
gemma-7b-it,INT4-MIXED,1024,9330.5,4222.5,93.7,10.67 | ||
orca-mini-3b,FP16,32,7416.5,122.3,94.7,10.56 | ||
codegen25-7b,INT8-CW,32,7557.6,170.7,98.4,10.16 | ||
qwen-7b-chat,INT4-MIXED,1024,10371.1,4271.7,98.9,10.11 | ||
llama-2-7b-chat-hf,INT8-CW,32,7390.6,171.6,99.9,10.01 | ||
opt-125m-gptq,INT4-MIXED,32,1116,25.8,8.1,123.5 | ||
opt-125m-gptq,INT4-MIXED,1024,1187.1,75.2,8.2,122.0 | ||
qwen2-0.5b,INT4-MIXED,32,1587.4,45.1,15.4,64.9 | ||
qwen2-0.5b,INT4-MIXED,1024,1587.8,228.2,15.6,64.1 | ||
tiny-llama-1.1b-chat,INT4-MIXED,32,1704.2,42.4,17.6,56.8 | ||
tiny-llama-1.1b-chat,INT4-MIXED,1024,1616.3,489.2,18.9,52.9 | ||
qwen2-0.5b,INT8-CW,32,1477.3,51.5,20.2,49.5 | ||
qwen2-0.5b,INT8-CW,1024,1592,263.7,20.6,48.5 | ||
tiny-llama-1.1b-chat,INT8-CW,32,1855.6,60.2,20.7,48.3 | ||
tiny-llama-1.1b-chat,INT8-CW,1024,1992.6,618.2,21.7,46.1 | ||
qwen2-1.5b,INT4-MIXED,32,2024.2,59.6,23.1,43.3 | ||
bloomz-560m,FP16,1024,2773.1,647.8,23.8,42.0 | ||
qwen2-1.5b,INT4-MIXED,1024,2177.7,577.4,23.8,42.0 | ||
bloomz-560m,FP16,32,2582.7,44.2,25.1,39.8 | ||
dolly-v2-3b,INT4-MIXED,32,2507.9,79.8,29.4,34.0 | ||
phi-2,INT4-MIXED,32,2568.9,74.6,29.7,33.7 | ||
qwen2-1.5b,INT8-CW,32,2577.3,81.6,30.5,32.8 | ||
red-pajama-incite-chat-3b-v1,INT4-MIXED,32,2489.4,69.9,30.5,32.8 | ||
minicpm-1b-sft,INT4-MIXED,31,2442.1,84.7,31,32.3 | ||
qwen2-1.5b,INT8-CW,1024,2739.8,773.3,31.2,32.1 | ||
gemma-2b-it,INT4-MIXED,32,2998.2,103.5,31.4,31.8 | ||
dolly-v2-3b,INT4-MIXED,1024,2508.1,1396.6,32,31.3 | ||
gemma-2b-it,INT4-MIXED,1024,3171.5,822.3,32.2,31.1 | ||
phi-2,INT4-MIXED,1024,2940.5,1395.3,32.2,31.1 | ||
red-pajama-incite-chat-3b-v1,INT4-MIXED,1023,2489.6,1435.5,33.1,30.2 | ||
minicpm-1b-sft,INT8-CW,31,2818.6,86.9,33.4,29.9 | ||
stable-zephyr-3b-dpo,INT4-MIXED,32,2638.2,87.4,33.8,29.6 | ||
stablelm-3b-4e1t,INT4-MIXED,32,2750.5,89.4,35.6,28.1 | ||
stablelm-3b-4e1t,INT4-MIXED,1023,3115.5,1473.1,38.1,26.2 | ||
phi-3-mini-4k-instruct,INT4-MIXED,32,3039.1,109.2,40.4,24.8 | ||
phi-2,INT8-CW,32,3599.7,107.5,42.1,23.8 | ||
gemma-2b-it,INT8-CW,32,3845.4,111.3,42.2,23.7 | ||
dolly-v2-3b,INT8-CW,32,3596.4,110.1,42.5,23.5 | ||
gemma-2b-it,INT8-CW,1024,3844.6,1183,43,23.3 | ||
red-pajama-incite-chat-3b-v1,INT8-CW,32,3590,111,43.3,23.1 | ||
phi-3-mini-4k-instruct,INT4-MIXED,1024,3467.6,1721.6,43.5,23.0 | ||
stablelm-3b-4e1t,INT8-CW,32,3582.8,111,44.3,22.6 | ||
stable-zephyr-3b-dpo,INT8-CW,32,3607.2,110.2,44.5,22.5 | ||
phi-2,INT8-CW,1024,3982,1508,44.6,22.4 | ||
dolly-v2-3b,INT8-CW,1024,3596.5,1529.1,44.9,22.3 | ||
minicpm-1b-sft,FP16,31,3769.9,84,45.4,22.0 | ||
red-pajama-incite-chat-3b-v1,INT8-CW,1023,3952,2064.5,45.7,21.9 | ||
stablelm-3b-4e1t,INT8-CW,1023,3934.5,2286.3,46.8,21.4 | ||
gpt-j-6b,INT4-MIXED,32,4443.5,159.3,56.7,17.6 | ||
phi-3-mini-4k-instruct,INT8-CW,32,4545,117.1,57.6,17.4 | ||
phi-3-mini-4k-instruct,INT8-CW,1024,4810.4,2068.8,60.5,16.5 | ||
gpt-j-6b,INT4-MIXED,1024,4746.4,2397,60.6,16.5 | ||
falcon-7b-instruct,INT4-MIXED,32,5014,203.7,61.3,16.3 | ||
qwen2-7b,INT4-MIXED,32,5269.4,203.8,62.3,16.1 | ||
codegen25-7b,INT4-MIXED,32,4641.1,170.6,63.5,15.7 | ||
llama-2-7b-gptq,INT4-MIXED,32,4597.3,172.1,63.5,15.7 | ||
falcon-7b-instruct,INT4-MIXED,1024,5230.6,2695.3,63.6,15.7 | ||
qwen2-7b,INT4-MIXED,1024,5370.8,2505.9,63.9,15.6 | ||
decilm-7b-instruct,INT4-MIXED,36,4614.2,301.1,65.3,15.3 | ||
codegen25-7b,INT4-MIXED,1024,4641.9,2629.6,67.4,14.8 | ||
llama-2-7b-gptq,INT4-MIXED,1024,4928.1,2584.3,67.6,14.8 | ||
mistral-7b-v0.1,INT4-MIXED,32,4928.5,180.9,69.2,14.5 | ||
llama-2-7b-chat-hf,INT4-MIXED,32,4985.7,160.3,69.5,14.4 | ||
qwen-7b-chat-gptq,INT4-MIXED,32,5426.7,188.3,69.5,14.4 | ||
llama-3-8b,INT4-MIXED,33,5473.4,285.7,70,14.3 | ||
flan-t5-xxl,INT4-MIXED,33,19293.8,211.7,70.1,14.3 | ||
llama-3-8b,INT4-MIXED,33,5389.2,281,70.8,14.1 | ||
mistral-7b-v0.1,INT4-MIXED,1024,5225.4,2713.3,71.8,13.9 | ||
zephyr-7b-beta,INT4-MIXED,32,5306.1,177.9,72.1,13.9 | ||
llama-3-8b,INT4-MIXED,1025,5615.2,2937.8,72.4,13.8 | ||
llama-3-8b,INT4-MIXED,1025,5531.7,2815.4,73.2,13.7 | ||
llama-2-7b-chat-hf,INT4-MIXED,1024,5319.5,2736.2,73.6,13.6 | ||
phi-2,FP16,32,6197,104.6,74.7,13.4 | ||
zephyr-7b-beta,INT4-MIXED,1024,5306.4,2802.3,74.7,13.4 | ||
qwen-7b-chat-gptq,INT4-MIXED,1024,5934.9,2606.9,75,13.3 | ||
dolly-v2-3b,FP16,32,6195.1,105.3,75.3,13.3 | ||
baichuan2-7b-chat,INT4-MIXED,32,5837.9,188.5,76.8,13.0 | ||
red-pajama-incite-chat-3b-v1,FP16,32,6178.6,118,76.8,13.0 | ||
gemma-7b-it,INT4-MIXED,32,6495.9,230.6,77,13.0 | ||
stablelm-3b-4e1t,FP16,32,6174.2,105.9,77.1,13.0 | ||
stable-zephyr-3b-dpo,FP16,32,6217.8,107.9,77.2,13.0 | ||
glm-4-9b-chat,INT4-MIXED,32,6333.4,225,77.3,12.9 | ||
phi-2,FP16,1024,6411.5,2065.2,77.3,12.9 | ||
dolly-v2-3b,FP16,1024,6410.1,2075,77.7,12.9 | ||
llama-3.1-8b,INT4-MIXED,32,6324.6,182.2,78.8,12.7 | ||
red-pajama-incite-chat-3b-v1,FP16,1023,6394.2,2752.4,79.2,12.6 | ||
stablelm-3b-4e1t,FP16,1023,6386.9,2953.3,79.5,12.6 | ||
glm-4-9b-chat,INT4-MIXED,1024,6439.5,3282.2,80,12.5 | ||
baichuan2-7b-chat,INT4-MIXED,1024,6174.1,2752.6,80.6,12.4 | ||
gemma-7b-it,INT4-MIXED,1024,6795.4,3118.3,80.6,12.4 | ||
llama-3.1-8b,INT4-MIXED,1024,6324.8,2865.7,81.3,12.3 | ||
gpt-j-6b,INT8-CW,32,6793.2,167.6,85,11.8 | ||
qwen-7b-chat,INT4-MIXED,32,7274.8,168.8,85.2,11.7 | ||
gpt-j-6b,INT8-CW,1024,6793.3,2668.4,88.8,11.3 | ||
qwen-7b-chat,INT4-MIXED,1024,7610.3,2991.9,90.6,11.0 | ||
flan-t5-xxl,INT4-MIXED,1139,23514,540.8,94.9,10.5 | ||
falcon-7b-instruct,INT8-CW,32,7764.1,181.3,95.5,10.5 | ||
llama-2-7b-chat-hf,INT8-CW,32,7330.9,172,96.1,10.4 | ||
falcon-7b-instruct,INT8-CW,1024,7987.4,3072.8,98.1,10.2 | ||
qwen2-7b,INT8-CW,32,8175.3,211.3,99.6,10.0 |
Oops, something went wrong.