Spaces:
Paused
Paused
import gradio as gr | |
import pandas as pd | |
# Import our UI factories and the data loader | |
from ui_components import create_leaderboard_display, create_benchmark_details_display, get_full_leaderboard_data, create_sub_navigation_bar | |
def build_category_page(CATEGORY_NAME, PAGE_DESCRIPTION): | |
with gr.Column(elem_id="page-content-wrapper"): | |
gr.HTML(f'<h2>AstaBench {CATEGORY_NAME} Leaderboard <span style="font-weight: normal; color: inherit;">(Aggregate)</span></h2>', elem_id="main-header") | |
validation_df, validation_tag_map = get_full_leaderboard_data("validation") | |
test_df, test_tag_map = get_full_leaderboard_data("test") | |
with gr.Column(elem_id="validation_nav_container", visible=False) as validation_nav_container: | |
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME, validation=True) | |
with gr.Column(elem_id="test_nav_container", visible=True) as test_nav_container: | |
create_sub_navigation_bar(test_tag_map, CATEGORY_NAME) | |
gr.Markdown(PAGE_DESCRIPTION, elem_id="category-intro") | |
# --- This page now has two main sections: Validation and Test --- | |
with gr.Tabs(): | |
with gr.Tab("Results: Test Set") as test_tab: | |
# Repeat the process for the "test" split | |
if not test_df.empty: | |
gr.Markdown("**Test Set** results are reserved for final assessment. This helps ensure that the agent generalizes well to unseen problems.") | |
create_leaderboard_display( | |
full_df=test_df, | |
tag_map=test_tag_map, | |
category_name=CATEGORY_NAME, | |
split_name="test" | |
) | |
create_benchmark_details_display( | |
full_df=test_df, | |
tag_map=test_tag_map, | |
category_name=CATEGORY_NAME, | |
validation=False, | |
) | |
else: | |
gr.Markdown("No data available for test split.") | |
with gr.Tab("Results: Validation Set") as validation_tab: | |
# 1. Load all necessary data for the "validation" split ONCE. | |
if not validation_df.empty: | |
gr.Markdown("**Validation Set** results are used during development to tune and compare agents before final testing.") | |
# 2. Render the main category display using the loaded data. | |
create_leaderboard_display( | |
full_df=validation_df, | |
tag_map=validation_tag_map, | |
category_name=CATEGORY_NAME, | |
split_name="validation" | |
) | |
# 3. Render the detailed breakdown for each benchmark in the category. | |
create_benchmark_details_display( | |
full_df=validation_df, | |
tag_map=validation_tag_map, | |
category_name=CATEGORY_NAME, | |
validation=True, | |
) | |
else: | |
gr.Markdown("No data available for validation split.") | |
show_validation_js = """ | |
() => { | |
document.getElementById('validation_nav_container').style.display = 'block'; | |
document.getElementById('test_nav_container').style.display = 'none'; | |
setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0); | |
} | |
""" | |
# JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots. | |
show_test_js = """ | |
() => { | |
document.getElementById('validation_nav_container').style.display = 'none'; | |
document.getElementById('test_nav_container').style.display = 'block'; | |
} | |
""" | |
# Assign the pure JS functions to the select events. No Python `fn` is needed. | |
validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js) | |
test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js) | |
return validation_nav_container, test_nav_container |