huckiyang commited on
Commit
67a3cba
Β·
1 Parent(s): 7182724

[update] table 2

Browse files
Files changed (2) hide show
  1. app.py +69 -1
  2. bias_evaluation_data.csv +16 -0
app.py CHANGED
@@ -18,6 +18,9 @@ LEADERBOARD_DF = pd.read_csv("leaderboard_data.csv")
18
  # Ensure all data is treated as string initially for display consistency
19
  LEADERBOARD_DF = LEADERBOARD_DF.astype(str)
20
 
 
 
 
21
 
22
  demo = gr.Blocks(css=custom_css)
23
  with demo:
@@ -93,7 +96,72 @@ with demo:
93
  )
94
 
95
  with gr.TabItem("πŸ“ Bias-aware evaluation of VLM ", elem_id="llm-benchmark-tab-table", id=2):
96
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  with gr.Row():
99
  with gr.Accordion("πŸ“™ Citation", open=False):
 
18
  # Ensure all data is treated as string initially for display consistency
19
  LEADERBOARD_DF = LEADERBOARD_DF.astype(str)
20
 
21
+ BIAS_DF = pd.read_csv("bias_evaluation_data.csv")
22
+ BIAS_DF = BIAS_DF.astype(str).fillna("-") # Fill NaN with '-' to be consistent with how it looks in the LaTeX table
23
+
24
 
25
  demo = gr.Blocks(css=custom_css)
26
  with demo:
 
96
  )
97
 
98
  with gr.TabItem("πŸ“ Bias-aware evaluation of VLM ", elem_id="llm-benchmark-tab-table", id=2):
99
+ with gr.Column():
100
+ gr.Markdown("### Bias-Aware Evaluation Results") # Title for this section
101
+ bias_table_output = gr.DataFrame(value=BIAS_DF, label="Bias Evaluation Results", interactive=True, wrap=True)
102
+
103
+ gr.Markdown("---")
104
+ gr.Markdown("### Display Options for Bias Table")
105
+
106
+ bias_all_columns_list = BIAS_DF.columns.tolist()
107
+ bias_column_selector = gr.CheckboxGroup(
108
+ choices=bias_all_columns_list,
109
+ value=bias_all_columns_list,
110
+ label="Select Columns to Display:"
111
+ )
112
+
113
+ # Filter by Bias_Type
114
+ bias_type_filter_choices = BIAS_DF["Bias_Type"].unique().tolist() if "Bias_Type" in BIAS_DF.columns else []
115
+ bias_type_selector = gr.CheckboxGroup(
116
+ choices=bias_type_filter_choices,
117
+ value=bias_type_filter_choices,
118
+ label="Filter by Bias Type:"
119
+ )
120
+
121
+ # Filter by Model (for the bias table)
122
+ bias_model_filter_choices = BIAS_DF["Model"].unique().tolist() if "Model" in BIAS_DF.columns else []
123
+ bias_model_selector = gr.CheckboxGroup(
124
+ choices=bias_model_filter_choices,
125
+ value=bias_model_filter_choices,
126
+ label="Filter by Model:"
127
+ )
128
+
129
+ def update_bias_table(selected_cols, selected_bias_types, selected_models):
130
+ temp_df = BIAS_DF.copy()
131
+
132
+ if selected_bias_types and "Bias_Type" in temp_df.columns:
133
+ temp_df = temp_df[temp_df["Bias_Type"].isin(selected_bias_types)]
134
+ elif not selected_bias_types and "Bias_Type" in temp_df.columns: # No bias types selected
135
+ temp_df = pd.DataFrame(columns=BIAS_DF.columns)
136
+
137
+ if selected_models and "Model" in temp_df.columns:
138
+ temp_df = temp_df[temp_df["Model"].isin(selected_models)]
139
+ elif not selected_models and "Model" in temp_df.columns: # No models selected
140
+ # If already filtered by bias_type, maintain that, else show empty based on model filter
141
+ if not selected_bias_types: # If bias types also not selected, then empty
142
+ temp_df = pd.DataFrame(columns=BIAS_DF.columns)
143
+ # if selected_bias_types IS populated, then it means we want all models for those bias types
144
+ # but if selected_models is empty, it means filter to show NO models, hence the following line:
145
+ elif "Bias_Type" in temp_df.columns and temp_df["Bias_Type"].isin(selected_bias_types).any():
146
+ temp_df = temp_df[~temp_df["Model"].isin(BIAS_DF["Model"].unique())] # effectively show no models for selected bias types
147
+
148
+ valid_selected_cols = [col for col in selected_cols if col in temp_df.columns]
149
+ if not valid_selected_cols and not temp_df.empty:
150
+ final_df = temp_df
151
+ elif not valid_selected_cols and temp_df.empty:
152
+ final_df = pd.DataFrame(columns=selected_cols)
153
+ else:
154
+ final_df = temp_df[valid_selected_cols]
155
+
156
+ return gr.DataFrame.update(value=final_df)
157
+
158
+ bias_column_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector], outputs=[bias_table_output])
159
+ bias_type_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector], outputs=[bias_table_output])
160
+ bias_model_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector], outputs=[bias_table_output])
161
+
162
+ # The original gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") is replaced by the table and its controls.
163
+ # If you still want to show LLM_BENCHMARKS_TEXT, you can add it here, e.g.:
164
+ # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
165
 
166
  with gr.Row():
167
  with gr.Accordion("πŸ“™ Citation", open=False):
bias_evaluation_data.csv ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Bias_Type,Model,CLIP-S,CapS_S,CapS_A,Recall,Noun,Verb,Syn,Sem,CH_s,FS,FS_s,Harm,N-avg
2
+ Gender bias,MiniGPT-4,0.3,0.9,1.1,7.8,1.7,2.6,6.3,3.2,4.8,6.3,4.0,1.64,0.51
3
+ Gender bias,InstructBLIP,0.8,2.7,1.2,8.4,1.9,3.3,1.0,0.1,6.8,3.8,5.0,0.72,0.40
4
+ Gender bias,LLaVA-1.5,0.7,2.2,0.7,9.5,2.2,4.1,1.5,0.2,7.6,3.8,3.7,0.39,0.46
5
+ Gender bias,mPLUG-Owl2,0.6,2.2,1.2,9.1,2.3,3.5,1.6,0.0,7.2,3.1,5.8,0.33,0.40
6
+ Gender bias,Qwen2-VL,0.2,0.7,0.5,6.3,0.1,3.6,13.5,2.5,4.4,0.9,5.7,1.77,0.63
7
+ Skin tone bias,MiniGPT-4,0.8,1.5,0.8,4.8,0.2,2.3,19.4,0.2,2.0,0.9,0.5,0.09,0.55
8
+ Skin tone bias,InstructBLIP,0.5,1.4,0.2,8.4,1.9,1.1,6.8,0.1,4.0,2.4,1.1,0.09,0.51
9
+ Skin tone bias,LLaVA-1.5,0.4,1.3,0.7,4.0,0.2,1.0,5.3,0.6,2.7,1.4,1.3,0.18,0.67
10
+ Skin tone bias,mPLUG-Owl2,0.6,1.9,0.5,5.1,0.8,2.2,7.6,0.4,1.7,0.1,0.4,0.00,0.67
11
+ Skin tone bias,Qwen2-VL,0.2,1.1,1.5,2.3,0.5,1.3,14.9,2.3,2.7,3.1,1.8,0.09,0.50
12
+ Language discrepancy,MiniGPT-4,0.8,1.5,3.9,2.3,4.3,5.2,52.2,5.0,5.4,5.6,3.4,0.10,0.40
13
+ Language discrepancy,InstructBLIP,,,,,,,,,,,,,,
14
+ Language discrepancy,LLaVA-1.5,0.4,0.8,2.0,1.1,1.1,1.8,11.4,1.8,4.7,2.0,1.6,0.06,0.95
15
+ Language discrepancy,mPLUG-Owl2,1.4,1.6,4.9,1.5,1.1,3.7,37.5,8.4,17.0,6.3,1.3,0.02,0.57
16
+ Language discrepancy,Qwen2-VL,0.2,3.6,6.7,1.9,3.9,3.8,90.8,26.2,6.4,7.5,2.1,0.14,0.28