ko-freshqa-leaderboard / ui /leaderboard_tab.py
jisubae
feat: Add optional HF dataset sync for leaderboard
4a43fed
"""
λ¦¬λ”λ³΄λ“œ νƒ­ UI μ»΄ν¬λ„ŒνŠΈ
πŸ† Leaderboard νƒ­μ˜ UI와 λ‘œμ§μ„ κ΄€λ¦¬ν•©λ‹ˆλ‹€.
"""
import gradio as gr
import pandas as pd
from src.leaderboard_manager import load_leaderboard_data
def create_leaderboard_tab():
"""λ¦¬λ”λ³΄λ“œ νƒ­ UI 생성"""
# μ΅œμƒλ‹¨ 톡합 검색 λ°” - κ°œμ„ λœ λ””μžμΈ
with gr.Row():
with gr.Column(scale=12):
search_input = gr.Textbox(
label="제좜자 이름 검색",
placeholder="πŸ” 제좜자 μ΄λ¦„μœΌλ‘œ 검색...",
value="",
container=False,
elem_classes=["search-input"]
)
with gr.Column(scale=1, min_width=100):
clear_search_btn = gr.Button(
"πŸ—‘οΈ μ΄ˆκΈ°ν™”",
variant="secondary",
size="sm",
elem_classes=["clear-search-btn"]
)
with gr.Column(scale=1, min_width=100):
refresh_btn = gr.Button(
"πŸ”„ μƒˆλ‘œκ³ μΉ¨",
variant="primary",
size="sm",
elem_classes=["refresh-btn"]
)
# λ¦¬λ”λ³΄λ“œ λ…ΈμΆœ 컬럼 및 ν‘œμ‹œλͺ… μ„€μ •
DISPLAY_COLUMNS = [
'rank',
'id',
'model',
'description',
'accuracy',
'fast_changing_accuracy',
'slow_changing_accuracy',
'never_changing_accuracy',
'acc_vp',
'acc_fp',
'acc_vp_one_hop',
'acc_vp_two_hop',
'acc_fp_one_hop',
'acc_fp_two_hop',
'acc_politics',
'acc_sports',
'acc_entertainment',
'acc_weather',
'acc_world',
'acc_economy',
'acc_society',
'acc_it_science',
'acc_life_culture',
'acc_unknown'
]
COLUMN_LABELS = {
'rank': 'Rank',
'id': 'ID',
'model': 'Model',
'description': 'Description',
'accuracy': 'Accuracy',
'fast_changing_accuracy': 'Fast-changing',
'slow_changing_accuracy': 'Slow-changing',
'never_changing_accuracy': 'Never-changing',
'acc_vp': 'Valid Premise',
'acc_fp': 'False Premise',
'acc_vp_one_hop': 'VP One-hop',
'acc_vp_two_hop': 'VP Multi-hop',
'acc_fp_one_hop': 'FP One-hop',
'acc_fp_two_hop': 'FP Multi-hop',
'acc_politics': 'Politics',
'acc_sports': 'Sports',
'acc_entertainment': 'Entertainment',
'acc_weather': 'Weather',
'acc_world': 'World',
'acc_economy': 'Economy',
'acc_society': 'Society',
'acc_it_science': 'IT/Science',
'acc_life_culture': 'Life/Culture',
'acc_unknown': 'Unknown'
}
def prepare_display_data(df: pd.DataFrame, global_ranking=None) -> pd.DataFrame:
"""ν…Œμ΄λΈ” ν‘œμ‹œμš© 데이터 μ€€λΉ„ (rank 계산 및 반올림 적용)"""
# 빈 λ°μ΄ν„°ν”„λ ˆμž„μΈ 경우 κ·ΈλŒ€λ‘œ λ°˜ν™˜
if df is None or df.empty:
return df if df is not None else pd.DataFrame()
display_df = df.copy()
# model / description κΈ°λ³Έκ°’ 처리
if "model" in display_df.columns:
display_df["model"] = display_df["model"].fillna("Anonymous Model")
display_df["model"] = display_df["model"].replace("", "Anonymous Model")
if "description" in display_df.columns:
display_df["description"] = (
display_df["description"]
.replace({None: "", pd.NA: ""})
.fillna("")
)
# rank 컬럼 μΆ”κ°€
if "accuracy" in display_df.columns:
if global_ranking is not None:
# μ™ΈλΆ€μ—μ„œ 전체 λž­ν‚Ή 정보λ₯Ό μ œκ³΅ν•˜λŠ” 경우
display_df["rank"] = display_df.index.map(global_ranking)
else:
# accuracy κΈ°μ€€μœΌλ‘œ μ •λ ¬ν•˜μ—¬ rank 계산
display_df = display_df.sort_values("accuracy", ascending=False).reset_index(
drop=True
)
def get_rank_display(rank: int) -> str:
if rank == 1:
return "πŸ₯‡"
elif rank == 2:
return "πŸ₯ˆ"
elif rank == 3:
return "πŸ₯‰"
else:
return str(rank)
display_df["rank"] = [get_rank_display(i + 1) for i in range(len(display_df))]
# 숫자 μ»¬λŸΌλ“€μ„ μ†Œμˆ«μ  2λ²ˆμ§Έμ—μ„œ 반올림 (ν‘œμ‹œμš©μœΌλ‘œλ§Œ)
numeric_columns = [
"accuracy",
"fast_changing_accuracy",
"slow_changing_accuracy",
"never_changing_accuracy",
"acc_vp",
"acc_fp",
"acc_vp_one_hop",
"acc_vp_two_hop",
"acc_fp_one_hop",
"acc_fp_two_hop",
"acc_vp_old",
"acc_vp_new",
"acc_fp_old",
"acc_fp_new",
"acc_politics",
"acc_sports",
"acc_entertainment",
"acc_weather",
"acc_world",
"acc_economy",
"acc_society",
"acc_it_science",
"acc_life_culture",
"acc_unknown",
]
for col in numeric_columns:
if col in display_df.columns:
display_df[col] = display_df[col].round(2)
return display_df
def format_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
"""λ¦¬λ”λ³΄λ“œμ— λ…ΈμΆœν•  컬럼 선택 및 헀더λͺ… λ³€ν™˜"""
if df.empty:
# 빈 DataFrame일 λ•Œλ„ 컬럼 ꡬ쑰λ₯Ό μœ μ§€ν•˜κΈ° μœ„ν•΄ 빈 DataFrame 생성
empty_df = pd.DataFrame(columns=DISPLAY_COLUMNS)
rename_map = {col: COLUMN_LABELS[col] for col in DISPLAY_COLUMNS if col in COLUMN_LABELS}
return empty_df.rename(columns=rename_map)
selected_columns = [col for col in DISPLAY_COLUMNS if col in df.columns]
formatted_df = df[selected_columns].copy()
rename_map = {col: COLUMN_LABELS[col] for col in selected_columns if col in COLUMN_LABELS}
return formatted_df.rename(columns=rename_map)
def build_leaderboard_state(source_df: pd.DataFrame):
"""λ¦¬λ”λ³΄λ“œ ν‘œμ‹œμš© Relaxed/Strict 데이터와 빈 μƒνƒœ μ—¬λΆ€ λ°˜ν™˜"""
if source_df is None:
source_df = pd.DataFrame()
if source_df.empty or 'evaluation_mode' not in source_df.columns:
relaxed_df = pd.DataFrame()
strict_df = pd.DataFrame()
else:
relaxed_df = source_df.query("evaluation_mode == 'Relaxed'")
strict_df = source_df.query("evaluation_mode == 'Strict'")
formatted_relaxed = format_leaderboard(prepare_display_data(relaxed_df))
formatted_strict = format_leaderboard(prepare_display_data(strict_df))
is_empty = relaxed_df.empty and strict_df.empty
return formatted_relaxed, formatted_strict, is_empty
# βœ… 초기 κ°’ (μ•± λΉŒλ“œ μ‹œμ  κΈ°μ€€)
leaderboard_data = load_leaderboard_data()
relaxed_initial, strict_initial, is_initial_empty = build_leaderboard_state(leaderboard_data)
# Relaxed λͺ¨λ“œ λ¦¬λ”λ³΄λ“œ
with gr.Column(elem_classes=["leaderboard-group"]):
gr.Markdown(
"### 🟒 Relaxed Evaluation"
)
relaxed_leaderboard_table = gr.DataFrame(
value=relaxed_initial,
interactive=False,
wrap=False,
show_label=False,
elem_classes=["leaderboard-table"]
)
# Strict λͺ¨λ“œ λ¦¬λ”λ³΄λ“œ
with gr.Column(elem_classes=["leaderboard-group"]):
gr.Markdown(
"### πŸ”΄ Strict Evaluation"
)
strict_leaderboard_table = gr.DataFrame(
value=strict_initial,
interactive=False,
wrap=False,
show_label=False,
elem_classes=["leaderboard-table"]
)
# λ¦¬λ”λ³΄λ“œ κ΄€λ ¨ μ„€λͺ…
with gr.Column(elem_classes=["leaderboard-group"]):
gr.Markdown("""
이 λ¦¬λ”λ³΄λ“œλŠ” [FreshQA](https://github.com/freshllms/freshqa)μ—μ„œ μ˜κ°μ„ λ°›μ•„ λ§Œλ“€μ–΄μ‘ŒμŠ΅λ‹ˆλ‹€.
fact type(fast changing, slow changing, never changing), μ „μ œμ˜ μ§„μ‹€μ„±,
10개의 도메인에 따라 λ‚˜λ‰˜λŠ” μ§ˆλ¬Έλ“€μ„ 톡해 ν•œκ΅­μ–΄ 지식과 κ΄€λ ¨λœ LLM의 μ΅œμ‹ μ„±μ„ νŒλ‹¨ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
이 λ¦¬λ”λ³΄λ“œλŠ” IITP의 **β€œμƒμ„±ν˜• μ–Έμ–΄λͺ¨λΈμ˜ 지속가λŠ₯μ„±κ³Ό μ‹œκ°„μ˜ 흐름에 λ”°λ₯Έ μ΅œμ‹ μ„± λ°˜μ˜μ„ μœ„ν•œ ν•™μŠ΅ 및 ν™œμš© 기술 κ°œλ°œβ€** μ‚¬μ—…μ˜ 지원을 λ°›μ•„ μ œμž‘λ˜μ—ˆμŠ΅λ‹ˆλ‹€.
결과의 λ¬΄κ²°μ„±Β·μœ νš¨μ„±μ„ μœ μ§€ν•˜κ³  **μˆœμœ„ μ‘°μž‘μ„ λ°©μ§€**ν•˜κΈ° μœ„ν•΄ 평가 λ°μ΄ν„°μ…‹μ˜ 정닡은 κΈ°λ°€λ‘œ μœ μ§€λ©λ‹ˆλ‹€.
""")
# 톡합 검색 ν•„ν„° ν•¨μˆ˜ (Relaxed와 Strict λͺ¨λ“œ λͺ¨λ‘ 필터링)
def filter_leaderboard_data(search_text):
"""Relaxed와 Strict λͺ¨λ“œ λ¦¬λ”λ³΄λ“œ 데이터 필터링 (CSV 기반)"""
try:
# CSVμ—μ„œ 전체 데이터 λ‘œλ“œ
all_df = load_leaderboard_data()
# 검색 ν•„ν„° 적용 (제좜자 μ •λ³΄λ§Œ 검색)
if search_text.strip() and 'id' in all_df.columns:
mask = all_df['id'].str.contains(search_text, case=False, na=False)
filtered_df = all_df[mask]
else:
filtered_df = all_df
formatted_relaxed, formatted_strict, _ = build_leaderboard_state(filtered_df)
return formatted_relaxed, formatted_strict
except Exception as e:
print(f"❌ λ¦¬λ”λ³΄λ“œ 데이터 필터링 μ‹€νŒ¨: {e}")
empty = pd.DataFrame()
return empty, empty
# 검색 이벀트 μ—°κ²°
search_input.change(
fn=filter_leaderboard_data,
inputs=[search_input],
outputs=[relaxed_leaderboard_table, strict_leaderboard_table]
)
# 검색 μ΄ˆκΈ°ν™” λ²„νŠΌ
def clear_search():
try:
all_df = load_leaderboard_data()
formatted_relaxed, formatted_strict, _ = build_leaderboard_state(all_df)
return "", formatted_relaxed, formatted_strict
except Exception as e:
print(f"❌ λ¦¬λ”λ³΄λ“œ 데이터 λ‘œλ“œ μ‹€νŒ¨: {e}")
empty = pd.DataFrame()
return "", empty, empty
clear_search_btn.click(
fn=clear_search,
outputs=[search_input, relaxed_leaderboard_table, strict_leaderboard_table]
)
# μƒˆλ‘œκ³ μΉ¨ λ²„νŠΌ
def refresh_leaderboard():
try:
all_df = load_leaderboard_data()
formatted_relaxed, formatted_strict, is_empty = build_leaderboard_state(all_df)
return formatted_relaxed, formatted_strict
except Exception as e:
print(f"❌ λ¦¬λ”λ³΄λ“œ μƒˆλ‘œκ³ μΉ¨ μ‹€νŒ¨: {e}")
empty = pd.DataFrame()
return empty, empty
refresh_btn.click(
fn=refresh_leaderboard,
outputs=[relaxed_leaderboard_table, strict_leaderboard_table]
)
# βœ… app.pyμ—μ„œ 초기 λ‘œλ”© μ‹œμ—λ„ μž¬μ‚¬μš©ν•  수 μžˆλ„λ‘ return
return relaxed_leaderboard_table, strict_leaderboard_table, refresh_leaderboard