shahagam4 commited on
Commit
4badc89
Β·
verified Β·
1 Parent(s): b0d0508

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -69
app.py CHANGED
@@ -11,14 +11,6 @@ data = {
11
  "LLaVa-v1.6-Mistral (7B)",
12
  "Gemini 1.5 Flash (MLLM)", "Gemini 2.0 Flash (MLLM)", "Gemini 1.5 Pro (MLLM)", "Gemini 2.0 Pro (MLLM)", "GPT-4o (MLLM)"
13
  ],
14
- "Type": [
15
- "LLM", "LLM", "LLM", "LLM", "LLM",
16
- "LLM", "LLM", "LLM", "LLM",
17
- "LLM", "LLM", "LLM", "LLM",
18
- "LLM", "LLM", "LLM",
19
- "MLLM",
20
- "MLLM", "MLLM", "MLLM", "MLLM", "MLLM"
21
- ],
22
  "T (Full)": [63.89, 64.91, 57.80, 63.37, 62.83, 49.94, 57.60, 56.06, 65.65, 56.94, 65.32, 56.90, 58.36, 62.78, 60.75, 57.43, None, 64.91, 64.79, 66.22, 66.42, 64.95],
23
  "TA (Full)": [45.81, 47.10, 40.69, 45.51, 45.82, 34.78, 39.86, 41.01, 47.42, 40.66, 43.38, 38.20, 37.47, 43.25, 44.59, 37.64, None, 45.06, 40.07, 46.90, 46.17, 44.53],
24
  "TAC (Full)": [21.29, 23.65, 19.08, 19.29, 22.67, 14.13, 16.49, 19.54, 19.65, 19.75, 22.54, 18.97, 19.20, 21.26, 18.63, 15.35, None, 20.66, 19.74, 23.23, 18.25, 19.60],
@@ -27,70 +19,16 @@ data = {
27
  "TAC (Segmented)": [21.36, 28.17, 21.81, 23.96, 25.38, 19.83, 19.56, 20.76, 22.77, 21.54, 24.60, 21.31, 21.43, 22.87, 24.20, 24.50, 3.30, 23.27, 23.52, 24.97, 25.21, 27.86]
28
  }
29
 
 
30
  df = pd.DataFrame(data)
31
 
32
- def filter_leaderboard(view="Segmented", model_type="All"):
33
- columns = ["Model", "Type"]
34
- if view == "Segmented":
35
- columns += ["T (Segmented)", "TA (Segmented)", "TAC (Segmented)"]
36
- else:
37
- columns += ["T (Full)", "TA (Full)", "TAC (Full)"]
38
-
39
- if model_type == "All":
40
- return df[columns].sort_values(by=columns[-1], ascending=False)
41
- else:
42
- return df[df["Type"] == model_type][columns].sort_values(by=columns[-1], ascending=False)
43
 
 
44
  with gr.Blocks(title="VideoConviction LLM Leaderboard") as demo:
45
- gr.Markdown("# πŸ“Š VideoConviction Benchmark Leaderboard")
46
- gr.Markdown("""
47
- This leaderboard displays the performance (F1 scores) of various Large Language Models (LLMs) and Multimodal LLMs (MLLMs) on the **VideoConviction** benchmark.
48
-
49
- ---
50
-
51
- ### πŸ“˜ About the Benchmark
52
- **VideoConviction** is a multimodal benchmark designed to evaluate **human conviction** and **stock market recommendation quality** from video content. It integrates vision, audio, and text to evaluate reasoning and persuasion detection in financial decision-making.
53
-
54
- πŸ“ **Paper**: [VideoConviction: A Multimodal Benchmark for Human Conviction and Stock Market Recommendations](https://doi.org/10.1145/3711896.3737417)
55
- πŸ“ **Conference**: ACM SIGKDD 2025
56
- πŸ‘₯ **Authors**: Michael Galarnyk, Veer Kejriwal, Agam Shah, Yash Bhardwaj, Nicholas Watney Meyer, Anand Krishnan, Sudheer Chava
57
-
58
- ---
59
-
60
- ### πŸ“„ Citation
61
- ```bibtex
62
- @inproceedings{galarnyk2025videoconviction,
63
- author = {Michael Galarnyk and Veer Kejriwal and Agam Shah and Yash Bhardwaj and Nicholas Watney Meyer and Anand Krishnan and Sudheer Chava},
64
- title = {VideoConviction: A Multimodal Benchmark for Human Conviction and Stock Market Recommendations},
65
- booktitle = {Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2 (KDD '25)},
66
- year = {2025},
67
- location = {Toronto, ON, Canada},
68
- pages = {12},
69
- publisher = {ACM},
70
- doi = {10.1145/3711896.3737417}
71
- }
72
- ```""")
73
-
74
- with gr.Row():
75
- with gr.Column():
76
- view_dropdown = gr.Dropdown(
77
- label="Select View Type", choices=["Segmented", "Full"], value="Segmented"
78
- )
79
- with gr.Column():
80
- model_dropdown = gr.Dropdown(
81
- label="Filter by Model Type", choices=["All", "LLM", "MLLM"], value="All"
82
- )
83
-
84
- leaderboard = gr.Dataframe(
85
- value=filter_leaderboard("Segmented", "All"),
86
- label="Leaderboard",
87
- interactive=False
88
- )
89
-
90
- def update_leaderboard(view, model_type):
91
- return filter_leaderboard(view, model_type)
92
-
93
- view_dropdown.change(fn=update_leaderboard, inputs=[view_dropdown, model_dropdown], outputs=leaderboard)
94
- model_dropdown.change(fn=update_leaderboard, inputs=[view_dropdown, model_dropdown], outputs=leaderboard)
95
 
96
  demo.launch()
 
11
  "LLaVa-v1.6-Mistral (7B)",
12
  "Gemini 1.5 Flash (MLLM)", "Gemini 2.0 Flash (MLLM)", "Gemini 1.5 Pro (MLLM)", "Gemini 2.0 Pro (MLLM)", "GPT-4o (MLLM)"
13
  ],
 
 
 
 
 
 
 
 
14
  "T (Full)": [63.89, 64.91, 57.80, 63.37, 62.83, 49.94, 57.60, 56.06, 65.65, 56.94, 65.32, 56.90, 58.36, 62.78, 60.75, 57.43, None, 64.91, 64.79, 66.22, 66.42, 64.95],
15
  "TA (Full)": [45.81, 47.10, 40.69, 45.51, 45.82, 34.78, 39.86, 41.01, 47.42, 40.66, 43.38, 38.20, 37.47, 43.25, 44.59, 37.64, None, 45.06, 40.07, 46.90, 46.17, 44.53],
16
  "TAC (Full)": [21.29, 23.65, 19.08, 19.29, 22.67, 14.13, 16.49, 19.54, 19.65, 19.75, 22.54, 18.97, 19.20, 21.26, 18.63, 15.35, None, 20.66, 19.74, 23.23, 18.25, 19.60],
 
19
  "TAC (Segmented)": [21.36, 28.17, 21.81, 23.96, 25.38, 19.83, 19.56, 20.76, 22.77, 21.54, 24.60, 21.31, 21.43, 22.87, 24.20, 24.50, 3.30, 23.27, 23.52, 24.97, 25.21, 27.86]
20
  }
21
 
22
+ # Create DataFrame
23
  df = pd.DataFrame(data)
24
 
25
+ def display_leaderboard():
26
+ return df
 
 
 
 
 
 
 
 
 
27
 
28
+ # Create a simple Gradio interface
29
  with gr.Blocks(title="VideoConviction LLM Leaderboard") as demo:
30
+ gr.Markdown("# VideoConviction Benchmark Leaderboard")
31
+ gr.Markdown("This leaderboard shows the F1 scores of various LLMs and MLLMs across the VideoConviction benchmark tasks.")
32
+ gr.Dataframe(display_leaderboard, interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  demo.launch()