Spaces:

nvidia
/

lotus-vlm-bias-leaderboard

Running

App Files Files Community

huckiyang commited on 17 days ago

Commit

67a3cba

1 Parent(s): 7182724

[update] table 2

Browse files

Files changed (2) hide show

app.py +69 -1
bias_evaluation_data.csv +16 -0

app.py CHANGED Viewed

@@ -18,6 +18,9 @@ LEADERBOARD_DF = pd.read_csv("leaderboard_data.csv")
 # Ensure all data is treated as string initially for display consistency
 LEADERBOARD_DF = LEADERBOARD_DF.astype(str)
 demo = gr.Blocks(css=custom_css)
 with demo:
@@ -93,7 +96,72 @@ with demo:
                 )
         with gr.TabItem("📝 Bias-aware evaluation of VLM ", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):

 # Ensure all data is treated as string initially for display consistency
 LEADERBOARD_DF = LEADERBOARD_DF.astype(str)
+BIAS_DF = pd.read_csv("bias_evaluation_data.csv")
+BIAS_DF = BIAS_DF.astype(str).fillna("-") # Fill NaN with '-' to be consistent with how it looks in the LaTeX table
 demo = gr.Blocks(css=custom_css)
 with demo:
                 )
         with gr.TabItem("📝 Bias-aware evaluation of VLM ", elem_id="llm-benchmark-tab-table", id=2):
+            with gr.Column():
+                gr.Markdown("### Bias-Aware Evaluation Results") # Title for this section
+                bias_table_output = gr.DataFrame(value=BIAS_DF, label="Bias Evaluation Results", interactive=True, wrap=True)
+                gr.Markdown("---")
+                gr.Markdown("### Display Options for Bias Table")
+                bias_all_columns_list = BIAS_DF.columns.tolist()
+                bias_column_selector = gr.CheckboxGroup(
+                    choices=bias_all_columns_list,
+                    value=bias_all_columns_list,
+                    label="Select Columns to Display:"
+                )
+                # Filter by Bias_Type
+                bias_type_filter_choices = BIAS_DF["Bias_Type"].unique().tolist() if "Bias_Type" in BIAS_DF.columns else []
+                bias_type_selector = gr.CheckboxGroup(
+                    choices=bias_type_filter_choices,
+                    value=bias_type_filter_choices,
+                    label="Filter by Bias Type:"
+                )
+                # Filter by Model (for the bias table)
+                bias_model_filter_choices = BIAS_DF["Model"].unique().tolist() if "Model" in BIAS_DF.columns else []
+                bias_model_selector = gr.CheckboxGroup(
+                    choices=bias_model_filter_choices,
+                    value=bias_model_filter_choices,
+                    label="Filter by Model:"
+                )
+                def update_bias_table(selected_cols, selected_bias_types, selected_models):
+                    temp_df = BIAS_DF.copy()
+                    if selected_bias_types and "Bias_Type" in temp_df.columns:
+                        temp_df = temp_df[temp_df["Bias_Type"].isin(selected_bias_types)]
+                    elif not selected_bias_types and "Bias_Type" in temp_df.columns: # No bias types selected
+                        temp_df = pd.DataFrame(columns=BIAS_DF.columns)
+                    if selected_models and "Model" in temp_df.columns:
+                        temp_df = temp_df[temp_df["Model"].isin(selected_models)]
+                    elif not selected_models and "Model" in temp_df.columns: # No models selected
+                        # If already filtered by bias_type, maintain that, else show empty based on model filter
+                        if not selected_bias_types: # If bias types also not selected, then empty
+                             temp_df = pd.DataFrame(columns=BIAS_DF.columns)
+                        # if selected_bias_types IS populated, then it means we want all models for those bias types
+                        # but if selected_models is empty, it means filter to show NO models, hence the following line:
+                        elif "Bias_Type" in temp_df.columns and temp_df["Bias_Type"].isin(selected_bias_types).any():
+                            temp_df = temp_df[~temp_df["Model"].isin(BIAS_DF["Model"].unique())] # effectively show no models for selected bias types
+                    valid_selected_cols = [col for col in selected_cols if col in temp_df.columns]
+                    if not valid_selected_cols and not temp_df.empty:
+                        final_df = temp_df
+                    elif not valid_selected_cols and temp_df.empty:
+                        final_df = pd.DataFrame(columns=selected_cols)
+                    else:
+                        final_df = temp_df[valid_selected_cols]
+                    return gr.DataFrame.update(value=final_df)
+                bias_column_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector], outputs=[bias_table_output])
+                bias_type_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector], outputs=[bias_table_output])
+                bias_model_selector.change(fn=update_bias_table, inputs=[bias_column_selector, bias_type_selector, bias_model_selector], outputs=[bias_table_output])
+            # The original gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") is replaced by the table and its controls.
+            # If you still want to show LLM_BENCHMARKS_TEXT, you can add it here, e.g.:
+            # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):

bias_evaluation_data.csv ADDED Viewed

	@@ -0,0 +1,16 @@

+Bias_Type,Model,CLIP-S,CapS_S,CapS_A,Recall,Noun,Verb,Syn,Sem,CH_s,FS,FS_s,Harm,N-avg
+Gender bias,MiniGPT-4,0.3,0.9,1.1,7.8,1.7,2.6,6.3,3.2,4.8,6.3,4.0,1.64,0.51
+Gender bias,InstructBLIP,0.8,2.7,1.2,8.4,1.9,3.3,1.0,0.1,6.8,3.8,5.0,0.72,0.40
+Gender bias,LLaVA-1.5,0.7,2.2,0.7,9.5,2.2,4.1,1.5,0.2,7.6,3.8,3.7,0.39,0.46
+Gender bias,mPLUG-Owl2,0.6,2.2,1.2,9.1,2.3,3.5,1.6,0.0,7.2,3.1,5.8,0.33,0.40
+Gender bias,Qwen2-VL,0.2,0.7,0.5,6.3,0.1,3.6,13.5,2.5,4.4,0.9,5.7,1.77,0.63
+Skin tone bias,MiniGPT-4,0.8,1.5,0.8,4.8,0.2,2.3,19.4,0.2,2.0,0.9,0.5,0.09,0.55
+Skin tone bias,InstructBLIP,0.5,1.4,0.2,8.4,1.9,1.1,6.8,0.1,4.0,2.4,1.1,0.09,0.51
+Skin tone bias,LLaVA-1.5,0.4,1.3,0.7,4.0,0.2,1.0,5.3,0.6,2.7,1.4,1.3,0.18,0.67
+Skin tone bias,mPLUG-Owl2,0.6,1.9,0.5,5.1,0.8,2.2,7.6,0.4,1.7,0.1,0.4,0.00,0.67
+Skin tone bias,Qwen2-VL,0.2,1.1,1.5,2.3,0.5,1.3,14.9,2.3,2.7,3.1,1.8,0.09,0.50
+Language discrepancy,MiniGPT-4,0.8,1.5,3.9,2.3,4.3,5.2,52.2,5.0,5.4,5.6,3.4,0.10,0.40
+Language discrepancy,InstructBLIP,,,,,,,,,,,,,,
+Language discrepancy,LLaVA-1.5,0.4,0.8,2.0,1.1,1.1,1.8,11.4,1.8,4.7,2.0,1.6,0.06,0.95
+Language discrepancy,mPLUG-Owl2,1.4,1.6,4.9,1.5,1.1,3.7,37.5,8.4,17.0,6.3,1.3,0.02,0.57
+Language discrepancy,Qwen2-VL,0.2,3.6,6.7,1.9,3.9,3.8,90.8,26.2,6.4,7.5,2.1,0.14,0.28