[data] add user types
Browse files- .DS_Store +0 -0
- app.py +41 -1
- src/about.py +1 -1
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
|
|
|
|
3 |
|
4 |
from src.about import (
|
5 |
CITATION_BUTTON_LABEL,
|
@@ -28,7 +30,7 @@ with demo:
|
|
28 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
29 |
|
30 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
31 |
-
with gr.TabItem("Unified performance evaluation of VLM captioners", elem_id="llm-benchmark-tab-table", id=0):
|
32 |
with gr.Column():
|
33 |
# 1. Display the table first
|
34 |
# Make DataFrame interactive for sorting
|
@@ -163,6 +165,44 @@ with demo:
|
|
163 |
# If you still want to show LLM_BENCHMARKS_TEXT, you can add it here, e.g.:
|
164 |
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
with gr.Row():
|
167 |
with gr.Accordion("π Citation", open=False):
|
168 |
citation_button = gr.Textbox(
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import numpy as np
|
5 |
|
6 |
from src.about import (
|
7 |
CITATION_BUTTON_LABEL,
|
|
|
30 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
31 |
|
32 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
33 |
+
with gr.TabItem("π§ Unified performance evaluation of VLM captioners", elem_id="llm-benchmark-tab-table", id=0):
|
34 |
with gr.Column():
|
35 |
# 1. Display the table first
|
36 |
# Make DataFrame interactive for sorting
|
|
|
165 |
# If you still want to show LLM_BENCHMARKS_TEXT, you can add it here, e.g.:
|
166 |
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
167 |
|
168 |
+
with gr.TabItem("π§βπ³ User Type and Preference-Oriented Scores ", elem_id="llm-benchmark-tab-table", id=3):
|
169 |
+
with gr.Column():
|
170 |
+
gr.Markdown("### Preference-Oriented Scores by User Type and Model")
|
171 |
+
|
172 |
+
def create_preference_score_chart():
|
173 |
+
# User types and model names
|
174 |
+
user_types = ['Detail-oriented', 'Risk-conscious', 'Accuracy-focused']
|
175 |
+
models = ['MiniGPT-4', 'InstructBLIP', 'LLaVA-1.5', 'mPLUG-Owl2', 'Qwen2-VL']
|
176 |
+
|
177 |
+
# Data
|
178 |
+
scores = np.array([
|
179 |
+
[0.20, 0.35, 0.45, 0.50, 0.85], # Detail-oriented
|
180 |
+
[0.40, 0.55, 0.60, 0.60, 0.58], # Risk-conscious
|
181 |
+
[0.20, 0.65, 0.90, 0.70, 0.75] # Accuracy-focused
|
182 |
+
])
|
183 |
+
|
184 |
+
x = np.arange(len(user_types))
|
185 |
+
width = 0.15
|
186 |
+
|
187 |
+
fig, ax = plt.subplots(figsize=(12, 7)) # Increased figure size for better readability
|
188 |
+
|
189 |
+
for i, model in enumerate(models):
|
190 |
+
ax.bar(x + i * width - (width * (len(models)-1)/2), scores[:, i], width, label=model) # Centered bars
|
191 |
+
|
192 |
+
ax.set_xlabel('User type', fontsize=12)
|
193 |
+
ax.set_ylabel('Preference-oriented score', fontsize=12)
|
194 |
+
ax.set_title('Preference-oriented scores by User Type and Model', fontsize=14)
|
195 |
+
ax.set_xticks(x)
|
196 |
+
ax.set_xticklabels(user_types, fontsize=10)
|
197 |
+
ax.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left') # Legend outside plot
|
198 |
+
|
199 |
+
plt.ylim(0, 1.1)
|
200 |
+
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
201 |
+
plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust layout to make space for legend
|
202 |
+
return fig
|
203 |
+
|
204 |
+
gr.Plot(value=create_preference_score_chart)
|
205 |
+
|
206 |
with gr.Row():
|
207 |
with gr.Accordion("π Citation", open=False):
|
208 |
citation_button = gr.Textbox(
|
src/about.py
CHANGED
@@ -25,7 +25,7 @@ TITLE = """<h1 align="center" id="space-title">πͺ· LOTUS: A Leaderboard for Det
|
|
25 |
|
26 |
# What does your leaderboard evaluate?
|
27 |
INTRODUCTION_TEXT = """
|
28 |
-
|
29 |
"""
|
30 |
|
31 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
25 |
|
26 |
# What does your leaderboard evaluate?
|
27 |
INTRODUCTION_TEXT = """
|
28 |
+
(ACL 2025 Industry Track) Large Vision-Language Models (LVLMs) have transformed image captioning, shifting from concise captions to detailed, context-rich descriptions. We introduce LOTUS, a unified leaderboard for evaluating such detailed captions, addressing three main gaps in existing evaluation approaches: lack of standardized criteria, absence of bias-aware assessments, and evaluations that disregard user preferences.
|
29 |
"""
|
30 |
|
31 |
# Which evaluations are you running? how can people reproduce what you have?
|