Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
# Define the leaderboard data
|
5 |
+
data = {
|
6 |
+
"Model": [
|
7 |
+
"DeepSeek-R1 (671B MoE)", "DeepSeek-V3 (671B MoE)", "Llama-3.1 (8B)", "Llama-3.1 (70B)", "Llama-3.1 (405B)",
|
8 |
+
"Mistral (7B)", "Mixtral-8x22B (141B MoE)", "Qwen2.5 (7B)", "Qwen2.5 (72B)",
|
9 |
+
"Claude 3.5 Haiku", "Claude 3.5 Sonnet", "Gemini 1.5 Flash", "Gemini 2.0 Flash",
|
10 |
+
"Gemini 1.5 Pro", "Gemini 2.0 Pro", "GPT-4o",
|
11 |
+
"LLaVa-v1.6-Mistral (7B)",
|
12 |
+
"Gemini 1.5 Flash (MLLM)", "Gemini 2.0 Flash (MLLM)", "Gemini 1.5 Pro (MLLM)", "Gemini 2.0 Pro (MLLM)", "GPT-4o (MLLM)"
|
13 |
+
],
|
14 |
+
"T (Full)": [63.89, 64.91, 57.80, 63.37, 62.83, 49.94, 57.60, 56.06, 65.65, 56.94, 65.32, 56.90, 58.36, 62.78, 60.75, 57.43, None, 64.91, 64.79, 66.22, 66.42, 64.95],
|
15 |
+
"TA (Full)": [45.81, 47.10, 40.69, 45.51, 45.82, 34.78, 39.86, 41.01, 47.42, 40.66, 43.38, 38.20, 37.47, 43.25, 44.59, 37.64, None, 45.06, 40.07, 46.90, 46.17, 44.53],
|
16 |
+
"TAC (Full)": [21.29, 23.65, 19.08, 19.29, 22.67, 14.13, 16.49, 19.54, 19.65, 19.75, 22.54, 18.97, 19.20, 21.26, 18.63, 15.35, None, 20.66, 19.74, 23.23, 18.25, 19.60],
|
17 |
+
"T (Segmented)": [71.18, 77.56, 71.86, 76.51, 78.60, 63.73, 70.12, 65.57, 79.20, 68.18, 75.90, 72.09, 70.05, 76.56, 74.66, 76.76, 16.46, 86.23, 82.24, 86.01, 86.04, 83.47],
|
18 |
+
"TA (Segmented)": [47.29, 51.35, 43.42, 50.64, 49.50, 39.23, 41.75, 38.16, 48.36, 43.61, 46.47, 44.83, 42.11, 46.38, 47.18, 45.90, 11.45, 54.21, 48.00, 53.51, 54.28, 51.15],
|
19 |
+
"TAC (Segmented)": [21.36, 28.17, 21.81, 23.96, 25.38, 19.83, 19.56, 20.76, 22.77, 21.54, 24.60, 21.31, 21.43, 22.87, 24.20, 24.50, 3.30, 23.27, 23.52, 24.97, 25.21, 27.86]
|
20 |
+
}
|
21 |
+
|
22 |
+
# Create DataFrame
|
23 |
+
df = pd.DataFrame(data)
|
24 |
+
|
25 |
+
def display_leaderboard():
|
26 |
+
return df
|
27 |
+
|
28 |
+
# Create a simple Gradio interface
|
29 |
+
with gr.Blocks(title="VideoConviction LLM Leaderboard") as demo:
|
30 |
+
gr.Markdown("# VideoConviction Benchmark Leaderboard")
|
31 |
+
gr.Markdown("This leaderboard shows the F1 scores of various LLMs and MLLMs across the VideoConviction benchmark tasks.")
|
32 |
+
gr.Dataframe(display_leaderboard, interactive=False)
|
33 |
+
|
34 |
+
demo.launch()
|