Spaces:

nvidia
/

lotus-vlm-bias-leaderboard

Running

App Files Files Community

huckiyang commited on 17 days ago

Commit

35d0ee5

1 Parent(s): cf40e88

[data] add user types

Browse files

Files changed (3) hide show

.DS_Store +0 -0
app.py +41 -1
src/about.py +1 -1

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import gradio as gr
 import pandas as pd
 from src.about import (
     CITATION_BUTTON_LABEL,
@@ -28,7 +30,7 @@ with demo:
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("Unified performance evaluation of VLM captioners", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Column():
                 # 1. Display the table first
                 # Make DataFrame interactive for sorting
@@ -163,6 +165,44 @@ with demo:
             # If you still want to show LLM_BENCHMARKS_TEXT, you can add it here, e.g.:
             # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):
             citation_button = gr.Textbox(

 import gradio as gr
 import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
 from src.about import (
     CITATION_BUTTON_LABEL,
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🧠 Unified performance evaluation of VLM captioners", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Column():
                 # 1. Display the table first
                 # Make DataFrame interactive for sorting
             # If you still want to show LLM_BENCHMARKS_TEXT, you can add it here, e.g.:
             # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("🧑‍🍳 User Type and Preference-Oriented Scores ", elem_id="llm-benchmark-tab-table", id=3):
+            with gr.Column():
+                gr.Markdown("### Preference-Oriented Scores by User Type and Model")
+                def create_preference_score_chart():
+                    # User types and model names
+                    user_types = ['Detail-oriented', 'Risk-conscious', 'Accuracy-focused']
+                    models = ['MiniGPT-4', 'InstructBLIP', 'LLaVA-1.5', 'mPLUG-Owl2', 'Qwen2-VL']
+                    # Data
+                    scores = np.array([
+                        [0.20, 0.35, 0.45, 0.50, 0.85],  # Detail-oriented
+                        [0.40, 0.55, 0.60, 0.60, 0.58],  # Risk-conscious
+                        [0.20, 0.65, 0.90, 0.70, 0.75]   # Accuracy-focused
+                    ])
+                    x = np.arange(len(user_types))
+                    width = 0.15
+                    fig, ax = plt.subplots(figsize=(12, 7)) # Increased figure size for better readability
+                    for i, model in enumerate(models):
+                        ax.bar(x + i * width - (width * (len(models)-1)/2), scores[:, i], width, label=model) # Centered bars
+                    ax.set_xlabel('User type', fontsize=12)
+                    ax.set_ylabel('Preference-oriented score', fontsize=12)
+                    ax.set_title('Preference-oriented scores by User Type and Model', fontsize=14)
+                    ax.set_xticks(x)
+                    ax.set_xticklabels(user_types, fontsize=10)
+                    ax.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left') # Legend outside plot
+                    plt.ylim(0, 1.1)
+                    plt.grid(axis='y', linestyle='--', alpha=0.7)
+                    plt.tight_layout(rect=[0, 0, 0.85, 1]) # Adjust layout to make space for legend
+                    return fig
+                gr.Plot(value=create_preference_score_chart)
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):
             citation_button = gr.Textbox(

src/about.py CHANGED Viewed

@@ -25,7 +25,7 @@ TITLE = """<h1 align="center" id="space-title">🪷 LOTUS: A Leaderboard for Det
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
-Intro text
 """
 # Which evaluations are you running? how can people reproduce what you have?

 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
+(ACL 2025 Industry Track) Large Vision-Language Models (LVLMs) have transformed image captioning, shifting from concise captions to detailed, context-rich descriptions. We introduce LOTUS, a unified leaderboard for evaluating such detailed captions, addressing three main gaps in existing evaluation approaches: lack of standardized criteria, absence of bias-aware assessments, and evaluations that disregard user preferences.
 """
 # Which evaluations are you running? how can people reproduce what you have?