Spaces:
Running
Running
| """ | |
| 리λ보λ ν UI μ»΄ν¬λνΈ | |
| π Leaderboard νμ UIμ λ‘μ§μ κ΄λ¦¬ν©λλ€. | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| from src.leaderboard_manager import load_leaderboard_data | |
| def create_leaderboard_tab(): | |
| """리λ보λ ν UI μμ±""" | |
| # μ΅μλ¨ ν΅ν© κ²μ λ° - κ°μ λ λμμΈ | |
| with gr.Row(): | |
| with gr.Column(scale=12): | |
| search_input = gr.Textbox( | |
| label="μ μΆμ μ΄λ¦ κ²μ", | |
| placeholder="π μ μΆμ μ΄λ¦μΌλ‘ κ²μ...", | |
| value="", | |
| container=False, | |
| elem_classes=["search-input"] | |
| ) | |
| with gr.Column(scale=1, min_width=100): | |
| clear_search_btn = gr.Button( | |
| "ποΈ μ΄κΈ°ν", | |
| variant="secondary", | |
| size="sm", | |
| elem_classes=["clear-search-btn"] | |
| ) | |
| with gr.Column(scale=1, min_width=100): | |
| refresh_btn = gr.Button( | |
| "π μλ‘κ³ μΉ¨", | |
| variant="primary", | |
| size="sm", | |
| elem_classes=["refresh-btn"] | |
| ) | |
| # 리λ보λ λ ΈμΆ μ»¬λΌ λ° νμλͺ μ€μ | |
| DISPLAY_COLUMNS = [ | |
| 'rank', | |
| 'id', | |
| 'model', | |
| 'description', | |
| 'accuracy', | |
| 'fast_changing_accuracy', | |
| 'slow_changing_accuracy', | |
| 'never_changing_accuracy', | |
| 'acc_vp', | |
| 'acc_fp', | |
| 'acc_vp_one_hop', | |
| 'acc_vp_two_hop', | |
| 'acc_fp_one_hop', | |
| 'acc_fp_two_hop', | |
| 'acc_politics', | |
| 'acc_sports', | |
| 'acc_entertainment', | |
| 'acc_weather', | |
| 'acc_world', | |
| 'acc_economy', | |
| 'acc_society', | |
| 'acc_it_science', | |
| 'acc_life_culture', | |
| 'acc_unknown' | |
| ] | |
| COLUMN_LABELS = { | |
| 'rank': 'Rank', | |
| 'id': 'ID', | |
| 'model': 'Model', | |
| 'description': 'Description', | |
| 'accuracy': 'Accuracy', | |
| 'fast_changing_accuracy': 'Fast-changing', | |
| 'slow_changing_accuracy': 'Slow-changing', | |
| 'never_changing_accuracy': 'Never-changing', | |
| 'acc_vp': 'Valid Premise', | |
| 'acc_fp': 'False Premise', | |
| 'acc_vp_one_hop': 'VP One-hop', | |
| 'acc_vp_two_hop': 'VP Multi-hop', | |
| 'acc_fp_one_hop': 'FP One-hop', | |
| 'acc_fp_two_hop': 'FP Multi-hop', | |
| 'acc_politics': 'Politics', | |
| 'acc_sports': 'Sports', | |
| 'acc_entertainment': 'Entertainment', | |
| 'acc_weather': 'Weather', | |
| 'acc_world': 'World', | |
| 'acc_economy': 'Economy', | |
| 'acc_society': 'Society', | |
| 'acc_it_science': 'IT/Science', | |
| 'acc_life_culture': 'Life/Culture', | |
| 'acc_unknown': 'Unknown' | |
| } | |
| def prepare_display_data(df: pd.DataFrame, global_ranking=None) -> pd.DataFrame: | |
| """ν μ΄λΈ νμμ© λ°μ΄ν° μ€λΉ (rank κ³μ° λ° λ°μ¬λ¦Ό μ μ©)""" | |
| # λΉ λ°μ΄ν°νλ μμΈ κ²½μ° κ·Έλλ‘ λ°ν | |
| if df is None or df.empty: | |
| return df if df is not None else pd.DataFrame() | |
| display_df = df.copy() | |
| # model / description κΈ°λ³Έκ° μ²λ¦¬ | |
| if "model" in display_df.columns: | |
| display_df["model"] = display_df["model"].fillna("Anonymous Model") | |
| display_df["model"] = display_df["model"].replace("", "Anonymous Model") | |
| if "description" in display_df.columns: | |
| display_df["description"] = ( | |
| display_df["description"] | |
| .replace({None: "", pd.NA: ""}) | |
| .fillna("") | |
| ) | |
| # rank μ»¬λΌ μΆκ° | |
| if "accuracy" in display_df.columns: | |
| if global_ranking is not None: | |
| # μΈλΆμμ μ 체 λνΉ μ 보λ₯Ό μ 곡νλ κ²½μ° | |
| display_df["rank"] = display_df.index.map(global_ranking) | |
| else: | |
| # accuracy κΈ°μ€μΌλ‘ μ λ ¬νμ¬ rank κ³μ° | |
| display_df = display_df.sort_values("accuracy", ascending=False).reset_index( | |
| drop=True | |
| ) | |
| def get_rank_display(rank: int) -> str: | |
| if rank == 1: | |
| return "π₯" | |
| elif rank == 2: | |
| return "π₯" | |
| elif rank == 3: | |
| return "π₯" | |
| else: | |
| return str(rank) | |
| display_df["rank"] = [get_rank_display(i + 1) for i in range(len(display_df))] | |
| # μ«μ 컬λΌλ€μ μμ«μ 2λ²μ§Έμμ λ°μ¬λ¦Ό (νμμ©μΌλ‘λ§) | |
| numeric_columns = [ | |
| "accuracy", | |
| "fast_changing_accuracy", | |
| "slow_changing_accuracy", | |
| "never_changing_accuracy", | |
| "acc_vp", | |
| "acc_fp", | |
| "acc_vp_one_hop", | |
| "acc_vp_two_hop", | |
| "acc_fp_one_hop", | |
| "acc_fp_two_hop", | |
| "acc_vp_old", | |
| "acc_vp_new", | |
| "acc_fp_old", | |
| "acc_fp_new", | |
| "acc_politics", | |
| "acc_sports", | |
| "acc_entertainment", | |
| "acc_weather", | |
| "acc_world", | |
| "acc_economy", | |
| "acc_society", | |
| "acc_it_science", | |
| "acc_life_culture", | |
| "acc_unknown", | |
| ] | |
| for col in numeric_columns: | |
| if col in display_df.columns: | |
| display_df[col] = display_df[col].round(2) | |
| return display_df | |
| def format_leaderboard(df: pd.DataFrame) -> pd.DataFrame: | |
| """리λ보λμ λ ΈμΆν μ»¬λΌ μ ν λ° ν€λλͺ λ³ν""" | |
| if df.empty: | |
| # λΉ DataFrameμΌ λλ μ»¬λΌ κ΅¬μ‘°λ₯Ό μ μ§νκΈ° μν΄ λΉ DataFrame μμ± | |
| empty_df = pd.DataFrame(columns=DISPLAY_COLUMNS) | |
| rename_map = {col: COLUMN_LABELS[col] for col in DISPLAY_COLUMNS if col in COLUMN_LABELS} | |
| return empty_df.rename(columns=rename_map) | |
| selected_columns = [col for col in DISPLAY_COLUMNS if col in df.columns] | |
| formatted_df = df[selected_columns].copy() | |
| rename_map = {col: COLUMN_LABELS[col] for col in selected_columns if col in COLUMN_LABELS} | |
| return formatted_df.rename(columns=rename_map) | |
| def build_leaderboard_state(source_df: pd.DataFrame): | |
| """리λ보λ νμμ© Relaxed/Strict λ°μ΄ν°μ λΉ μν μ¬λΆ λ°ν""" | |
| if source_df is None: | |
| source_df = pd.DataFrame() | |
| if source_df.empty or 'evaluation_mode' not in source_df.columns: | |
| relaxed_df = pd.DataFrame() | |
| strict_df = pd.DataFrame() | |
| else: | |
| relaxed_df = source_df.query("evaluation_mode == 'Relaxed'") | |
| strict_df = source_df.query("evaluation_mode == 'Strict'") | |
| formatted_relaxed = format_leaderboard(prepare_display_data(relaxed_df)) | |
| formatted_strict = format_leaderboard(prepare_display_data(strict_df)) | |
| is_empty = relaxed_df.empty and strict_df.empty | |
| return formatted_relaxed, formatted_strict, is_empty | |
| # β μ΄κΈ° κ° (μ± λΉλ μμ κΈ°μ€) | |
| leaderboard_data = load_leaderboard_data() | |
| relaxed_initial, strict_initial, is_initial_empty = build_leaderboard_state(leaderboard_data) | |
| # Relaxed λͺ¨λ 리λ보λ | |
| with gr.Column(elem_classes=["leaderboard-group"]): | |
| gr.Markdown( | |
| "### π’ Relaxed Evaluation" | |
| ) | |
| relaxed_leaderboard_table = gr.DataFrame( | |
| value=relaxed_initial, | |
| interactive=False, | |
| wrap=False, | |
| show_label=False, | |
| elem_classes=["leaderboard-table"] | |
| ) | |
| # Strict λͺ¨λ 리λ보λ | |
| with gr.Column(elem_classes=["leaderboard-group"]): | |
| gr.Markdown( | |
| "### π΄ Strict Evaluation" | |
| ) | |
| strict_leaderboard_table = gr.DataFrame( | |
| value=strict_initial, | |
| interactive=False, | |
| wrap=False, | |
| show_label=False, | |
| elem_classes=["leaderboard-table"] | |
| ) | |
| # 리λ보λ κ΄λ ¨ μ€λͺ | |
| with gr.Column(elem_classes=["leaderboard-group"]): | |
| gr.Markdown(""" | |
| μ΄ λ¦¬λ보λλ [FreshQA](https://github.com/freshllms/freshqa)μμ μκ°μ λ°μ λ§λ€μ΄μ‘μ΅λλ€. | |
| fact type(fast changing, slow changing, never changing), μ μ μ μ§μ€μ±, | |
| 10κ°μ λλ©μΈμ λ°λΌ λλλ μ§λ¬Έλ€μ ν΅ν΄ νκ΅μ΄ μ§μκ³Ό κ΄λ ¨λ LLMμ μ΅μ μ±μ νλ¨ν μ μμ΅λλ€. | |
| μ΄ λ¦¬λ보λλ IITPμ **βμμ±ν μΈμ΄λͺ¨λΈμ μ§μκ°λ₯μ±κ³Ό μκ°μ νλ¦μ λ°λ₯Έ μ΅μ μ± λ°μμ μν νμ΅ λ° νμ© κΈ°μ κ°λ°β** μ¬μ μ μ§μμ λ°μ μ μλμμ΅λλ€. | |
| κ²°κ³Όμ 무결μ±Β·μ ν¨μ±μ μ μ§νκ³ **μμ μ‘°μμ λ°©μ§**νκΈ° μν΄ νκ° λ°μ΄ν°μ μ μ λ΅μ κΈ°λ°λ‘ μ μ§λ©λλ€. | |
| """) | |
| # ν΅ν© κ²μ νν° ν¨μ (Relaxedμ Strict λͺ¨λ λͺ¨λ νν°λ§) | |
| def filter_leaderboard_data(search_text): | |
| """Relaxedμ Strict λͺ¨λ 리λ보λ λ°μ΄ν° νν°λ§ (CSV κΈ°λ°)""" | |
| try: | |
| # CSVμμ μ 체 λ°μ΄ν° λ‘λ | |
| all_df = load_leaderboard_data() | |
| # κ²μ νν° μ μ© (μ μΆμ μ λ³΄λ§ κ²μ) | |
| if search_text.strip() and 'id' in all_df.columns: | |
| mask = all_df['id'].str.contains(search_text, case=False, na=False) | |
| filtered_df = all_df[mask] | |
| else: | |
| filtered_df = all_df | |
| formatted_relaxed, formatted_strict, _ = build_leaderboard_state(filtered_df) | |
| return formatted_relaxed, formatted_strict | |
| except Exception as e: | |
| print(f"β 리λ보λ λ°μ΄ν° νν°λ§ μ€ν¨: {e}") | |
| empty = pd.DataFrame() | |
| return empty, empty | |
| # κ²μ μ΄λ²€νΈ μ°κ²° | |
| search_input.change( | |
| fn=filter_leaderboard_data, | |
| inputs=[search_input], | |
| outputs=[relaxed_leaderboard_table, strict_leaderboard_table] | |
| ) | |
| # κ²μ μ΄κΈ°ν λ²νΌ | |
| def clear_search(): | |
| try: | |
| all_df = load_leaderboard_data() | |
| formatted_relaxed, formatted_strict, _ = build_leaderboard_state(all_df) | |
| return "", formatted_relaxed, formatted_strict | |
| except Exception as e: | |
| print(f"β 리λ보λ λ°μ΄ν° λ‘λ μ€ν¨: {e}") | |
| empty = pd.DataFrame() | |
| return "", empty, empty | |
| clear_search_btn.click( | |
| fn=clear_search, | |
| outputs=[search_input, relaxed_leaderboard_table, strict_leaderboard_table] | |
| ) | |
| # μλ‘κ³ μΉ¨ λ²νΌ | |
| def refresh_leaderboard(): | |
| try: | |
| all_df = load_leaderboard_data() | |
| formatted_relaxed, formatted_strict, is_empty = build_leaderboard_state(all_df) | |
| return formatted_relaxed, formatted_strict | |
| except Exception as e: | |
| print(f"β 리λ보λ μλ‘κ³ μΉ¨ μ€ν¨: {e}") | |
| empty = pd.DataFrame() | |
| return empty, empty | |
| refresh_btn.click( | |
| fn=refresh_leaderboard, | |
| outputs=[relaxed_leaderboard_table, strict_leaderboard_table] | |
| ) | |
| # β app.pyμμ μ΄κΈ° λ‘λ© μμλ μ¬μ¬μ©ν μ μλλ‘ return | |
| return relaxed_leaderboard_table, strict_leaderboard_table, refresh_leaderboard | |