File size: 11,407 Bytes
5e8f045
 
 
 
 
 
 
 
4a43fed
5e8f045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a43fed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e8f045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a43fed
5e8f045
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a43fed
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
"""
λ¦¬λ”λ³΄λ“œ νƒ­ UI μ»΄ν¬λ„ŒνŠΈ

πŸ† Leaderboard νƒ­μ˜ UI와 λ‘œμ§μ„ κ΄€λ¦¬ν•©λ‹ˆλ‹€.
"""

import gradio as gr
import pandas as pd
from src.leaderboard_manager import load_leaderboard_data


def create_leaderboard_tab():
    """λ¦¬λ”λ³΄λ“œ νƒ­ UI 생성"""
    
    # μ΅œμƒλ‹¨ 톡합 검색 λ°” - κ°œμ„ λœ λ””μžμΈ
    with gr.Row():
        with gr.Column(scale=12):
            search_input = gr.Textbox(
                label="제좜자 이름 검색",
                placeholder="πŸ” 제좜자 μ΄λ¦„μœΌλ‘œ 검색...",
                value="",
                container=False,
                elem_classes=["search-input"]
            )
        with gr.Column(scale=1, min_width=100):
            clear_search_btn = gr.Button(
                "πŸ—‘οΈ μ΄ˆκΈ°ν™”", 
                variant="secondary", 
                size="sm",
                elem_classes=["clear-search-btn"]
            )
        with gr.Column(scale=1, min_width=100):
            refresh_btn = gr.Button(
                "πŸ”„ μƒˆλ‘œκ³ μΉ¨", 
                variant="primary",
                size="sm",
                elem_classes=["refresh-btn"]
            )

    # λ¦¬λ”λ³΄λ“œ λ…ΈμΆœ 컬럼 및 ν‘œμ‹œλͺ… μ„€μ •
    DISPLAY_COLUMNS = [
        'rank',
        'id',
        'model',
        'description',
        'accuracy',
        'fast_changing_accuracy',
        'slow_changing_accuracy',
        'never_changing_accuracy',
        'acc_vp',
        'acc_fp',
        'acc_vp_one_hop',
        'acc_vp_two_hop',
        'acc_fp_one_hop',
        'acc_fp_two_hop',
        'acc_politics',
        'acc_sports',
        'acc_entertainment',
        'acc_weather',
        'acc_world',
        'acc_economy',
        'acc_society',
        'acc_it_science',
        'acc_life_culture',
        'acc_unknown'
    ]
    
    COLUMN_LABELS = {
        'rank': 'Rank',
        'id': 'ID',
        'model': 'Model',
        'description': 'Description',
        'accuracy': 'Accuracy',
        'fast_changing_accuracy': 'Fast-changing',
        'slow_changing_accuracy': 'Slow-changing',
        'never_changing_accuracy': 'Never-changing',
        'acc_vp': 'Valid Premise',
        'acc_fp': 'False Premise',
        'acc_vp_one_hop': 'VP One-hop',
        'acc_vp_two_hop': 'VP Multi-hop',
        'acc_fp_one_hop': 'FP One-hop',
        'acc_fp_two_hop': 'FP Multi-hop',
        'acc_politics': 'Politics',
        'acc_sports': 'Sports',
        'acc_entertainment': 'Entertainment',
        'acc_weather': 'Weather',
        'acc_world': 'World',
        'acc_economy': 'Economy',
        'acc_society': 'Society',
        'acc_it_science': 'IT/Science',
        'acc_life_culture': 'Life/Culture',
        'acc_unknown': 'Unknown'
    }

    def prepare_display_data(df: pd.DataFrame, global_ranking=None) -> pd.DataFrame:
        """ν…Œμ΄λΈ” ν‘œμ‹œμš© 데이터 μ€€λΉ„ (rank 계산 및 반올림 적용)"""
        # 빈 λ°μ΄ν„°ν”„λ ˆμž„μΈ 경우 κ·ΈλŒ€λ‘œ λ°˜ν™˜
        if df is None or df.empty:
            return df if df is not None else pd.DataFrame()

        display_df = df.copy()

        # model / description κΈ°λ³Έκ°’ 처리
        if "model" in display_df.columns:
            display_df["model"] = display_df["model"].fillna("Anonymous Model")
            display_df["model"] = display_df["model"].replace("", "Anonymous Model")
        if "description" in display_df.columns:
            display_df["description"] = (
                display_df["description"]
                .replace({None: "", pd.NA: ""})
                .fillna("")
            )

        # rank 컬럼 μΆ”κ°€
        if "accuracy" in display_df.columns:
            if global_ranking is not None:
                # μ™ΈλΆ€μ—μ„œ 전체 λž­ν‚Ή 정보λ₯Ό μ œκ³΅ν•˜λŠ” 경우
                display_df["rank"] = display_df.index.map(global_ranking)
            else:
                # accuracy κΈ°μ€€μœΌλ‘œ μ •λ ¬ν•˜μ—¬ rank 계산
                display_df = display_df.sort_values("accuracy", ascending=False).reset_index(
                    drop=True
                )

                def get_rank_display(rank: int) -> str:
                    if rank == 1:
                        return "πŸ₯‡"
                    elif rank == 2:
                        return "πŸ₯ˆ"
                    elif rank == 3:
                        return "πŸ₯‰"
                    else:
                        return str(rank)

                display_df["rank"] = [get_rank_display(i + 1) for i in range(len(display_df))]

        # 숫자 μ»¬λŸΌλ“€μ„ μ†Œμˆ«μ  2λ²ˆμ§Έμ—μ„œ 반올림 (ν‘œμ‹œμš©μœΌλ‘œλ§Œ)
        numeric_columns = [
            "accuracy",
            "fast_changing_accuracy",
            "slow_changing_accuracy",
            "never_changing_accuracy",
            "acc_vp",
            "acc_fp",
            "acc_vp_one_hop",
            "acc_vp_two_hop",
            "acc_fp_one_hop",
            "acc_fp_two_hop",
            "acc_vp_old",
            "acc_vp_new",
            "acc_fp_old",
            "acc_fp_new",
            "acc_politics",
            "acc_sports",
            "acc_entertainment",
            "acc_weather",
            "acc_world",
            "acc_economy",
            "acc_society",
            "acc_it_science",
            "acc_life_culture",
            "acc_unknown",
        ]

        for col in numeric_columns:
            if col in display_df.columns:
                display_df[col] = display_df[col].round(2)

        return display_df

    
    def format_leaderboard(df: pd.DataFrame) -> pd.DataFrame:
        """λ¦¬λ”λ³΄λ“œμ— λ…ΈμΆœν•  컬럼 선택 및 헀더λͺ… λ³€ν™˜"""
        if df.empty:
            # 빈 DataFrame일 λ•Œλ„ 컬럼 ꡬ쑰λ₯Ό μœ μ§€ν•˜κΈ° μœ„ν•΄ 빈 DataFrame 생성
            empty_df = pd.DataFrame(columns=DISPLAY_COLUMNS)
            rename_map = {col: COLUMN_LABELS[col] for col in DISPLAY_COLUMNS if col in COLUMN_LABELS}
            return empty_df.rename(columns=rename_map)
        
        selected_columns = [col for col in DISPLAY_COLUMNS if col in df.columns]
        formatted_df = df[selected_columns].copy()
        rename_map = {col: COLUMN_LABELS[col] for col in selected_columns if col in COLUMN_LABELS}
        return formatted_df.rename(columns=rename_map)
    
    def build_leaderboard_state(source_df: pd.DataFrame):
        """λ¦¬λ”λ³΄λ“œ ν‘œμ‹œμš© Relaxed/Strict 데이터와 빈 μƒνƒœ μ—¬λΆ€ λ°˜ν™˜"""
        if source_df is None:
            source_df = pd.DataFrame()

        if source_df.empty or 'evaluation_mode' not in source_df.columns:
            relaxed_df = pd.DataFrame()
            strict_df = pd.DataFrame()
        else:
            relaxed_df = source_df.query("evaluation_mode == 'Relaxed'")
            strict_df = source_df.query("evaluation_mode == 'Strict'")

        formatted_relaxed = format_leaderboard(prepare_display_data(relaxed_df))
        formatted_strict = format_leaderboard(prepare_display_data(strict_df))
        is_empty = relaxed_df.empty and strict_df.empty
        return formatted_relaxed, formatted_strict, is_empty

    # βœ… 초기 κ°’ (μ•± λΉŒλ“œ μ‹œμ  κΈ°μ€€)
    leaderboard_data = load_leaderboard_data()
    relaxed_initial, strict_initial, is_initial_empty = build_leaderboard_state(leaderboard_data)

    # Relaxed λͺ¨λ“œ λ¦¬λ”λ³΄λ“œ
    with gr.Column(elem_classes=["leaderboard-group"]):
        gr.Markdown(
            "### 🟒 Relaxed Evaluation"
        )

        relaxed_leaderboard_table = gr.DataFrame(
            value=relaxed_initial,
            interactive=False,
            wrap=False,
            show_label=False,
            elem_classes=["leaderboard-table"]
        )
    
    # Strict λͺ¨λ“œ λ¦¬λ”λ³΄λ“œ
    with gr.Column(elem_classes=["leaderboard-group"]):
        gr.Markdown(
            "### πŸ”΄ Strict Evaluation"
        )
        
        strict_leaderboard_table = gr.DataFrame(
            value=strict_initial,
            interactive=False,
            wrap=False,
            show_label=False,
            elem_classes=["leaderboard-table"]
        )
    
    # λ¦¬λ”λ³΄λ“œ κ΄€λ ¨ μ„€λͺ…
    with gr.Column(elem_classes=["leaderboard-group"]):
        gr.Markdown("""
            이 λ¦¬λ”λ³΄λ“œλŠ” [FreshQA](https://github.com/freshllms/freshqa)μ—μ„œ μ˜κ°μ„ λ°›μ•„ λ§Œλ“€μ–΄μ‘ŒμŠ΅λ‹ˆλ‹€.  
            fact type(fast changing, slow changing, never changing), μ „μ œμ˜ μ§„μ‹€μ„±,  
            10개의 도메인에 따라 λ‚˜λ‰˜λŠ” μ§ˆλ¬Έλ“€μ„ 톡해 ν•œκ΅­μ–΄ 지식과 κ΄€λ ¨λœ LLM의 μ΅œμ‹ μ„±μ„ νŒλ‹¨ν•  수 μžˆμŠ΅λ‹ˆλ‹€.

            이 λ¦¬λ”λ³΄λ“œλŠ” IITP의 **β€œμƒμ„±ν˜• μ–Έμ–΄λͺ¨λΈμ˜ 지속가λŠ₯μ„±κ³Ό μ‹œκ°„μ˜ 흐름에 λ”°λ₯Έ μ΅œμ‹ μ„± λ°˜μ˜μ„ μœ„ν•œ ν•™μŠ΅ 및 ν™œμš© 기술 κ°œλ°œβ€** μ‚¬μ—…μ˜ 지원을 λ°›μ•„ μ œμž‘λ˜μ—ˆμŠ΅λ‹ˆλ‹€.

            결과의 λ¬΄κ²°μ„±Β·μœ νš¨μ„±μ„ μœ μ§€ν•˜κ³  **μˆœμœ„ μ‘°μž‘μ„ λ°©μ§€**ν•˜κΈ° μœ„ν•΄ 평가 λ°μ΄ν„°μ…‹μ˜ 정닡은 κΈ°λ°€λ‘œ μœ μ§€λ©λ‹ˆλ‹€.
        """)


    # 톡합 검색 ν•„ν„° ν•¨μˆ˜ (Relaxed와 Strict λͺ¨λ“œ λͺ¨λ‘ 필터링)
    def filter_leaderboard_data(search_text):
        """Relaxed와 Strict λͺ¨λ“œ λ¦¬λ”λ³΄λ“œ 데이터 필터링 (CSV 기반)"""
        try:
            # CSVμ—μ„œ 전체 데이터 λ‘œλ“œ
            all_df = load_leaderboard_data()

            # 검색 ν•„ν„° 적용 (제좜자 μ •λ³΄λ§Œ 검색)
            if search_text.strip() and 'id' in all_df.columns:
                mask = all_df['id'].str.contains(search_text, case=False, na=False)
                filtered_df = all_df[mask]
            else:
                filtered_df = all_df

            formatted_relaxed, formatted_strict, _ = build_leaderboard_state(filtered_df)
            return formatted_relaxed, formatted_strict
        except Exception as e:
            print(f"❌ λ¦¬λ”λ³΄λ“œ 데이터 필터링 μ‹€νŒ¨: {e}")
            empty = pd.DataFrame()
            return empty, empty
    
    # 검색 이벀트 μ—°κ²°
    search_input.change(
        fn=filter_leaderboard_data,
        inputs=[search_input],
        outputs=[relaxed_leaderboard_table, strict_leaderboard_table]
    )
    
    # 검색 μ΄ˆκΈ°ν™” λ²„νŠΌ
    def clear_search():
        try:
            all_df = load_leaderboard_data()
            formatted_relaxed, formatted_strict, _ = build_leaderboard_state(all_df)
            return "", formatted_relaxed, formatted_strict
        except Exception as e:
            print(f"❌ λ¦¬λ”λ³΄λ“œ 데이터 λ‘œλ“œ μ‹€νŒ¨: {e}")
            empty = pd.DataFrame()
            return "", empty, empty
    
    clear_search_btn.click(
        fn=clear_search,
        outputs=[search_input, relaxed_leaderboard_table, strict_leaderboard_table]
    )
    
    # μƒˆλ‘œκ³ μΉ¨ λ²„νŠΌ
    def refresh_leaderboard():
        try:
            all_df = load_leaderboard_data()
            formatted_relaxed, formatted_strict, is_empty = build_leaderboard_state(all_df)
            return formatted_relaxed, formatted_strict
        except Exception as e:
            print(f"❌ λ¦¬λ”λ³΄λ“œ μƒˆλ‘œκ³ μΉ¨ μ‹€νŒ¨: {e}")
            empty = pd.DataFrame()
            return empty, empty
    
    refresh_btn.click(
        fn=refresh_leaderboard,
        outputs=[relaxed_leaderboard_table, strict_leaderboard_table]
    )

    # βœ… app.pyμ—μ„œ 초기 λ‘œλ”© μ‹œμ—λ„ μž¬μ‚¬μš©ν•  수 μžˆλ„λ‘ return
    return relaxed_leaderboard_table, strict_leaderboard_table, refresh_leaderboard