[rank] adding rank
Browse files- app.py +56 -49
- leaderboard_data.csv +1 -1
app.py
CHANGED
@@ -19,6 +19,51 @@ from src.display.css_html_js import custom_css
|
|
19 |
# Load leaderboard data with multi-header, do not set index initially
|
20 |
LEADERBOARD_DF_ORIGINAL = pd.read_csv("leaderboard_data.csv", header=[0, 1])
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
# Function to prepare DataFrame for display (format headers, ensure Model column)
|
23 |
def format_leaderboard_df_for_display(df_orig):
|
24 |
df_display = df_orig.copy()
|
@@ -40,22 +85,16 @@ BIAS_DF = BIAS_DF.astype(str).fillna("-")
|
|
40 |
demo = gr.Blocks(css=custom_css)
|
41 |
with demo:
|
42 |
gr.HTML(TITLE)
|
43 |
-
gr.HTML("""<div style="text-align: center;">
|
44 |
-
<img src="https://huggingface.co/spaces/nvidia/LOTUS-VLM-Bias/blob/main/overview-acl-25.png" alt="Overview ACL 2025" style="width: 75%;">
|
45 |
-
</div>""")
|
46 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
47 |
|
48 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
49 |
with gr.TabItem("🧠 Unified performance evaluation of VLM captioners", elem_id="llm-benchmark-tab-table", id=0):
|
50 |
with gr.Column():
|
51 |
-
# gr.Image("table_snapshot.png", label="Original Table Snapshot", interactive=False) # Removed this line
|
52 |
-
|
53 |
table_output = gr.DataFrame(value=LEADERBOARD_DF_DISPLAY_INIT, label="Leaderboard Results", interactive=True, wrap=True)
|
54 |
|
55 |
gr.Markdown("---")
|
56 |
gr.Markdown("### Display Options")
|
57 |
|
58 |
-
# Filter choices from the original DataFrame's ('Model', 'Model') column
|
59 |
model_filter_choices = LEADERBOARD_DF_ORIGINAL[('Model', 'Model')].unique().tolist()
|
60 |
model_selector = gr.CheckboxGroup(
|
61 |
choices=model_filter_choices,
|
@@ -66,8 +105,7 @@ with demo:
|
|
66 |
def update_table(selected_models_from_filter):
|
67 |
filtered_df_orig = LEADERBOARD_DF_ORIGINAL.copy()
|
68 |
if not selected_models_from_filter:
|
69 |
-
|
70 |
-
filtered_df_orig = LEADERBOARD_DF_ORIGINAL[LEADERBOARD_DF_ORIGINAL[('Model', 'Model')].isin([])] # Empty but keeps structure
|
71 |
else:
|
72 |
valid_selected_models = [model for model in selected_models_from_filter if model in model_filter_choices]
|
73 |
if not valid_selected_models:
|
@@ -75,7 +113,6 @@ with demo:
|
|
75 |
else:
|
76 |
filtered_df_orig = LEADERBOARD_DF_ORIGINAL[LEADERBOARD_DF_ORIGINAL[('Model', 'Model')].isin(valid_selected_models)]
|
77 |
|
78 |
-
# Format the filtered DataFrame for display
|
79 |
df_to_display = format_leaderboard_df_for_display(filtered_df_orig)
|
80 |
return gr.DataFrame.update(value=df_to_display)
|
81 |
|
@@ -87,54 +124,41 @@ with demo:
|
|
87 |
|
88 |
with gr.TabItem("📝 Bias-aware evaluation of VLM ", elem_id="llm-benchmark-tab-table", id=2):
|
89 |
with gr.Column():
|
90 |
-
gr.Markdown("### Bias-Aware Evaluation Results")
|
91 |
bias_table_output = gr.DataFrame(value=BIAS_DF, label="Bias Evaluation Results", interactive=True, wrap=True)
|
92 |
-
|
93 |
gr.Markdown("---")
|
94 |
gr.Markdown("### Display Options for Bias Table")
|
95 |
-
|
96 |
bias_all_columns_list = BIAS_DF.columns.tolist()
|
97 |
bias_column_selector = gr.CheckboxGroup(
|
98 |
choices=bias_all_columns_list,
|
99 |
value=bias_all_columns_list,
|
100 |
label="Select Columns to Display:"
|
101 |
)
|
102 |
-
|
103 |
-
# Filter by Bias_Type
|
104 |
bias_type_filter_choices = BIAS_DF["Bias_Type"].unique().tolist() if "Bias_Type" in BIAS_DF.columns else []
|
105 |
bias_type_selector = gr.CheckboxGroup(
|
106 |
choices=bias_type_filter_choices,
|
107 |
value=bias_type_filter_choices,
|
108 |
label="Filter by Bias Type:"
|
109 |
)
|
110 |
-
|
111 |
-
# Filter by Model (for the bias table)
|
112 |
bias_model_filter_choices = BIAS_DF["Model"].unique().tolist() if "Model" in BIAS_DF.columns else []
|
113 |
bias_model_selector_for_bias_tab = gr.CheckboxGroup(
|
114 |
choices=bias_model_filter_choices,
|
115 |
value=bias_model_filter_choices,
|
116 |
label="Filter by Model:"
|
117 |
)
|
118 |
-
|
119 |
def update_bias_table(selected_cols, selected_bias_types, selected_models):
|
120 |
temp_df = BIAS_DF.copy()
|
121 |
-
|
122 |
if selected_bias_types and "Bias_Type" in temp_df.columns:
|
123 |
temp_df = temp_df[temp_df["Bias_Type"].isin(selected_bias_types)]
|
124 |
-
elif not selected_bias_types and "Bias_Type" in temp_df.columns:
|
125 |
temp_df = pd.DataFrame(columns=BIAS_DF.columns)
|
126 |
-
|
127 |
if selected_models and "Model" in temp_df.columns:
|
128 |
temp_df = temp_df[temp_df["Model"].isin(selected_models)]
|
129 |
-
elif not selected_models and "Model" in temp_df.columns:
|
130 |
-
|
131 |
-
if not selected_bias_types: # If bias types also not selected, then empty
|
132 |
temp_df = pd.DataFrame(columns=BIAS_DF.columns)
|
133 |
-
# if selected_bias_types IS populated, then it means we want all models for those bias types
|
134 |
-
# but if selected_models is empty, it means filter to show NO models, hence the following line:
|
135 |
elif "Bias_Type" in temp_df.columns and temp_df["Bias_Type"].isin(selected_bias_types).any():
|
136 |
-
temp_df = temp_df[~temp_df["Model"].isin(BIAS_DF["Model"].unique())]
|
137 |
-
|
138 |
valid_selected_cols = [col for col in selected_cols if col in temp_df.columns]
|
139 |
if not valid_selected_cols and not temp_df.empty:
|
140 |
final_df = temp_df
|
@@ -142,53 +166,37 @@ with demo:
|
|
142 |
final_df = pd.DataFrame(columns=selected_cols)
|
143 |
else:
|
144 |
final_df = temp_df[valid_selected_cols]
|
145 |
-
|
146 |
return gr.DataFrame.update(value=final_df)
|
147 |
-
|
148 |
bias_column_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector_for_bias_tab], outputs=[bias_table_output])
|
149 |
bias_type_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector_for_bias_tab], outputs=[bias_table_output])
|
150 |
bias_model_selector_for_bias_tab.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector_for_bias_tab], outputs=[bias_table_output])
|
151 |
-
|
152 |
-
# The original gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") is replaced by the table and its controls.
|
153 |
-
# If you still want to show LLM_BENCHMARKS_TEXT, you can add it here, e.g.:
|
154 |
-
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
155 |
|
156 |
with gr.TabItem("🧑🍳 User Type and Preference-Oriented Scores ", elem_id="llm-benchmark-tab-table", id=3):
|
157 |
with gr.Column():
|
158 |
gr.Markdown("### Preference-Oriented Scores by User Type and Model")
|
159 |
-
|
160 |
def create_preference_score_chart():
|
161 |
-
# User types and model names
|
162 |
user_types = ['Detail-oriented', 'Risk-conscious', 'Accuracy-focused']
|
163 |
models = ['MiniGPT-4', 'InstructBLIP', 'LLaVA-1.5', 'mPLUG-Owl2', 'Qwen2-VL']
|
164 |
-
|
165 |
-
# Data
|
166 |
scores = np.array([
|
167 |
[0.20, 0.35, 0.45, 0.50, 0.85], # Detail-oriented
|
168 |
[0.40, 0.55, 0.67, 0.53, 0.58], # Risk-conscious
|
169 |
[0.20, 0.60, 0.72, 0.69, 0.75] # Accuracy-focused
|
170 |
])
|
171 |
-
|
172 |
x = np.arange(len(user_types))
|
173 |
width = 0.15
|
174 |
-
|
175 |
-
fig, ax = plt.subplots(figsize=(12, 7)) # Increased figure size for better readability
|
176 |
-
|
177 |
for i, model in enumerate(models):
|
178 |
-
ax.bar(x + i * width - (width * (len(models)-1)/2), scores[:, i], width, label=model)
|
179 |
-
|
180 |
ax.set_xlabel('User type', fontsize=12)
|
181 |
ax.set_ylabel('Preference-oriented score', fontsize=12)
|
182 |
ax.set_title('Preference-oriented scores by User Type and Model', fontsize=14)
|
183 |
ax.set_xticks(x)
|
184 |
ax.set_xticklabels(user_types, fontsize=10)
|
185 |
-
ax.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
|
186 |
-
|
187 |
plt.ylim(0, 1.1)
|
188 |
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
189 |
-
plt.tight_layout(rect=[0, 0, 0.85, 1])
|
190 |
return fig
|
191 |
-
|
192 |
gr.Plot(value=create_preference_score_chart)
|
193 |
|
194 |
with gr.Row():
|
@@ -201,8 +209,7 @@ with demo:
|
|
201 |
show_copy_button=True,
|
202 |
)
|
203 |
|
204 |
-
|
205 |
-
gr.Markdown("---") # Visual separator
|
206 |
link_to_discussion = "https://huggingface.co/login?next=%2Fspaces%2Fnvidia%2FLOTUS-VLM-Bias%2Fdiscussions%2Fnew"
|
207 |
gr.HTML(f'''
|
208 |
<div style="text-align: center; margin-top: 20px; margin-bottom: 20px;">
|
|
|
19 |
# Load leaderboard data with multi-header, do not set index initially
|
20 |
LEADERBOARD_DF_ORIGINAL = pd.read_csv("leaderboard_data.csv", header=[0, 1])
|
21 |
|
22 |
+
# Calculate Average N-avg and Rank
|
23 |
+
# Identify N-avg columns (adjust if names are different in CSV header row 2)
|
24 |
+
n_avg_cols_to_average = [
|
25 |
+
('Alignment', 'N-avg↑'),
|
26 |
+
('Descriptiveness', 'N-avg↑'),
|
27 |
+
('Complexity', 'N-avg↑'),
|
28 |
+
('Side effects', 'N-avg↑')
|
29 |
+
]
|
30 |
+
|
31 |
+
# Ensure these columns are numeric, coercing errors to NaN (though they should be numbers)
|
32 |
+
for col_tuple in n_avg_cols_to_average:
|
33 |
+
if col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
|
34 |
+
LEADERBOARD_DF_ORIGINAL[col_tuple] = pd.to_numeric(LEADERBOARD_DF_ORIGINAL[col_tuple], errors='coerce')
|
35 |
+
else:
|
36 |
+
print(f"Warning: N-avg column {col_tuple} not found for averaging.") # Add a warning
|
37 |
+
|
38 |
+
# Calculate average, handling cases where some N-avg columns might be missing
|
39 |
+
existing_n_avg_cols = [col for col in n_avg_cols_to_average if col in LEADERBOARD_DF_ORIGINAL.columns]
|
40 |
+
if existing_n_avg_cols:
|
41 |
+
LEADERBOARD_DF_ORIGINAL[('Overall', 'Average N-avg')] = LEADERBOARD_DF_ORIGINAL[existing_n_avg_cols].mean(axis=1)
|
42 |
+
LEADERBOARD_DF_ORIGINAL[('Overall', 'Rank')] = LEADERBOARD_DF_ORIGINAL[('Overall', 'Average N-avg')].rank(method='min', ascending=False).astype(int)
|
43 |
+
else:
|
44 |
+
LEADERBOARD_DF_ORIGINAL[('Overall', 'Average N-avg')] = np.nan
|
45 |
+
LEADERBOARD_DF_ORIGINAL[('Overall', 'Rank')] = np.nan
|
46 |
+
|
47 |
+
|
48 |
+
# Reorder columns to put Rank and Average N-avg first, then Model, then the rest
|
49 |
+
model_col_tuple = ('Model', 'Model') # Original name of the model column
|
50 |
+
rank_col_tuple = ('Overall', 'Rank')
|
51 |
+
avg_navg_col_tuple = ('Overall', 'Average N-avg')
|
52 |
+
|
53 |
+
new_col_order = []
|
54 |
+
if rank_col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
|
55 |
+
new_col_order.append(rank_col_tuple)
|
56 |
+
if avg_navg_col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
|
57 |
+
new_col_order.append(avg_navg_col_tuple)
|
58 |
+
if model_col_tuple in LEADERBOARD_DF_ORIGINAL.columns:
|
59 |
+
new_col_order.append(model_col_tuple)
|
60 |
+
|
61 |
+
for col in LEADERBOARD_DF_ORIGINAL.columns:
|
62 |
+
if col not in new_col_order:
|
63 |
+
new_col_order.append(col)
|
64 |
+
LEADERBOARD_DF_ORIGINAL = LEADERBOARD_DF_ORIGINAL[new_col_order]
|
65 |
+
|
66 |
+
|
67 |
# Function to prepare DataFrame for display (format headers, ensure Model column)
|
68 |
def format_leaderboard_df_for_display(df_orig):
|
69 |
df_display = df_orig.copy()
|
|
|
85 |
demo = gr.Blocks(css=custom_css)
|
86 |
with demo:
|
87 |
gr.HTML(TITLE)
|
|
|
|
|
|
|
88 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
89 |
|
90 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
91 |
with gr.TabItem("🧠 Unified performance evaluation of VLM captioners", elem_id="llm-benchmark-tab-table", id=0):
|
92 |
with gr.Column():
|
|
|
|
|
93 |
table_output = gr.DataFrame(value=LEADERBOARD_DF_DISPLAY_INIT, label="Leaderboard Results", interactive=True, wrap=True)
|
94 |
|
95 |
gr.Markdown("---")
|
96 |
gr.Markdown("### Display Options")
|
97 |
|
|
|
98 |
model_filter_choices = LEADERBOARD_DF_ORIGINAL[('Model', 'Model')].unique().tolist()
|
99 |
model_selector = gr.CheckboxGroup(
|
100 |
choices=model_filter_choices,
|
|
|
105 |
def update_table(selected_models_from_filter):
|
106 |
filtered_df_orig = LEADERBOARD_DF_ORIGINAL.copy()
|
107 |
if not selected_models_from_filter:
|
108 |
+
filtered_df_orig = LEADERBOARD_DF_ORIGINAL[LEADERBOARD_DF_ORIGINAL[('Model', 'Model')].isin([])]
|
|
|
109 |
else:
|
110 |
valid_selected_models = [model for model in selected_models_from_filter if model in model_filter_choices]
|
111 |
if not valid_selected_models:
|
|
|
113 |
else:
|
114 |
filtered_df_orig = LEADERBOARD_DF_ORIGINAL[LEADERBOARD_DF_ORIGINAL[('Model', 'Model')].isin(valid_selected_models)]
|
115 |
|
|
|
116 |
df_to_display = format_leaderboard_df_for_display(filtered_df_orig)
|
117 |
return gr.DataFrame.update(value=df_to_display)
|
118 |
|
|
|
124 |
|
125 |
with gr.TabItem("📝 Bias-aware evaluation of VLM ", elem_id="llm-benchmark-tab-table", id=2):
|
126 |
with gr.Column():
|
127 |
+
gr.Markdown("### Bias-Aware Evaluation Results")
|
128 |
bias_table_output = gr.DataFrame(value=BIAS_DF, label="Bias Evaluation Results", interactive=True, wrap=True)
|
|
|
129 |
gr.Markdown("---")
|
130 |
gr.Markdown("### Display Options for Bias Table")
|
|
|
131 |
bias_all_columns_list = BIAS_DF.columns.tolist()
|
132 |
bias_column_selector = gr.CheckboxGroup(
|
133 |
choices=bias_all_columns_list,
|
134 |
value=bias_all_columns_list,
|
135 |
label="Select Columns to Display:"
|
136 |
)
|
|
|
|
|
137 |
bias_type_filter_choices = BIAS_DF["Bias_Type"].unique().tolist() if "Bias_Type" in BIAS_DF.columns else []
|
138 |
bias_type_selector = gr.CheckboxGroup(
|
139 |
choices=bias_type_filter_choices,
|
140 |
value=bias_type_filter_choices,
|
141 |
label="Filter by Bias Type:"
|
142 |
)
|
|
|
|
|
143 |
bias_model_filter_choices = BIAS_DF["Model"].unique().tolist() if "Model" in BIAS_DF.columns else []
|
144 |
bias_model_selector_for_bias_tab = gr.CheckboxGroup(
|
145 |
choices=bias_model_filter_choices,
|
146 |
value=bias_model_filter_choices,
|
147 |
label="Filter by Model:"
|
148 |
)
|
|
|
149 |
def update_bias_table(selected_cols, selected_bias_types, selected_models):
|
150 |
temp_df = BIAS_DF.copy()
|
|
|
151 |
if selected_bias_types and "Bias_Type" in temp_df.columns:
|
152 |
temp_df = temp_df[temp_df["Bias_Type"].isin(selected_bias_types)]
|
153 |
+
elif not selected_bias_types and "Bias_Type" in temp_df.columns:
|
154 |
temp_df = pd.DataFrame(columns=BIAS_DF.columns)
|
|
|
155 |
if selected_models and "Model" in temp_df.columns:
|
156 |
temp_df = temp_df[temp_df["Model"].isin(selected_models)]
|
157 |
+
elif not selected_models and "Model" in temp_df.columns:
|
158 |
+
if not selected_bias_types:
|
|
|
159 |
temp_df = pd.DataFrame(columns=BIAS_DF.columns)
|
|
|
|
|
160 |
elif "Bias_Type" in temp_df.columns and temp_df["Bias_Type"].isin(selected_bias_types).any():
|
161 |
+
temp_df = temp_df[~temp_df["Model"].isin(BIAS_DF["Model"].unique())]
|
|
|
162 |
valid_selected_cols = [col for col in selected_cols if col in temp_df.columns]
|
163 |
if not valid_selected_cols and not temp_df.empty:
|
164 |
final_df = temp_df
|
|
|
166 |
final_df = pd.DataFrame(columns=selected_cols)
|
167 |
else:
|
168 |
final_df = temp_df[valid_selected_cols]
|
|
|
169 |
return gr.DataFrame.update(value=final_df)
|
|
|
170 |
bias_column_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector_for_bias_tab], outputs=[bias_table_output])
|
171 |
bias_type_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector_for_bias_tab], outputs=[bias_table_output])
|
172 |
bias_model_selector_for_bias_tab.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector_for_bias_tab], outputs=[bias_table_output])
|
|
|
|
|
|
|
|
|
173 |
|
174 |
with gr.TabItem("🧑🍳 User Type and Preference-Oriented Scores ", elem_id="llm-benchmark-tab-table", id=3):
|
175 |
with gr.Column():
|
176 |
gr.Markdown("### Preference-Oriented Scores by User Type and Model")
|
|
|
177 |
def create_preference_score_chart():
|
|
|
178 |
user_types = ['Detail-oriented', 'Risk-conscious', 'Accuracy-focused']
|
179 |
models = ['MiniGPT-4', 'InstructBLIP', 'LLaVA-1.5', 'mPLUG-Owl2', 'Qwen2-VL']
|
|
|
|
|
180 |
scores = np.array([
|
181 |
[0.20, 0.35, 0.45, 0.50, 0.85], # Detail-oriented
|
182 |
[0.40, 0.55, 0.67, 0.53, 0.58], # Risk-conscious
|
183 |
[0.20, 0.60, 0.72, 0.69, 0.75] # Accuracy-focused
|
184 |
])
|
|
|
185 |
x = np.arange(len(user_types))
|
186 |
width = 0.15
|
187 |
+
fig, ax = plt.subplots(figsize=(12, 7))
|
|
|
|
|
188 |
for i, model in enumerate(models):
|
189 |
+
ax.bar(x + i * width - (width * (len(models)-1)/2), scores[:, i], width, label=model)
|
|
|
190 |
ax.set_xlabel('User type', fontsize=12)
|
191 |
ax.set_ylabel('Preference-oriented score', fontsize=12)
|
192 |
ax.set_title('Preference-oriented scores by User Type and Model', fontsize=14)
|
193 |
ax.set_xticks(x)
|
194 |
ax.set_xticklabels(user_types, fontsize=10)
|
195 |
+
ax.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
|
|
|
196 |
plt.ylim(0, 1.1)
|
197 |
plt.grid(axis='y', linestyle='--', alpha=0.7)
|
198 |
+
plt.tight_layout(rect=[0, 0, 0.85, 1])
|
199 |
return fig
|
|
|
200 |
gr.Plot(value=create_preference_score_chart)
|
201 |
|
202 |
with gr.Row():
|
|
|
209 |
show_copy_button=True,
|
210 |
)
|
211 |
|
212 |
+
gr.Markdown("---")
|
|
|
213 |
link_to_discussion = "https://huggingface.co/login?next=%2Fspaces%2Fnvidia%2FLOTUS-VLM-Bias%2Fdiscussions%2Fnew"
|
214 |
gr.HTML(f'''
|
215 |
<div style="text-align: center; margin-top: 20px; margin-bottom: 20px;">
|
leaderboard_data.csv
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
Model,Alignment,Alignment,Alignment,Alignment,Descriptiveness,Descriptiveness,Descriptiveness,Descriptiveness,Complexity,Complexity,Complexity,Side effects,Side effects,Side effects,Side effects,Side effects
|
2 |
-
Model,CLIP-S,CapS_S,CapS_A,N-avg
|
3 |
MiniGPT-4,60.8,33.0,35.9,0.19,75.3,33.0,34.7,0.22,8.0,32.6,0.38,37.8,55.0,37.6,0.31,0.18
|
4 |
InstructBLIP,59.9,36.0,35.5,0.18,82.1,34.2,34.7,0.40,7.7,46.0,0.41,58.5,62.4,43.3,0.10,0.66
|
5 |
LLaVA-1.5,60.1,38.5,45.0,0.67,80.5,32.5,31.0,0.11,7.1,39.6,0.08,49.0,65.7,41.6,0.12,0.71
|
|
|
1 |
Model,Alignment,Alignment,Alignment,Alignment,Descriptiveness,Descriptiveness,Descriptiveness,Descriptiveness,Complexity,Complexity,Complexity,Side effects,Side effects,Side effects,Side effects,Side effects
|
2 |
+
Model,CLIP-S,CapS_S,CapS_A,N-avg↑,Recall,Noun,Verb,N-avg↑,Syn,Sem,N-avg↑,CHs↓,FS↑,FSs↑,Harm↓,N-avg↑
|
3 |
MiniGPT-4,60.8,33.0,35.9,0.19,75.3,33.0,34.7,0.22,8.0,32.6,0.38,37.8,55.0,37.6,0.31,0.18
|
4 |
InstructBLIP,59.9,36.0,35.5,0.18,82.1,34.2,34.7,0.40,7.7,46.0,0.41,58.5,62.4,43.3,0.10,0.66
|
5 |
LLaVA-1.5,60.1,38.5,45.0,0.67,80.5,32.5,31.0,0.11,7.1,39.6,0.08,49.0,65.7,41.6,0.12,0.71
|