Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: use dataclass to manage the dataframes
Browse files- app.py +60 -48
- src/envs.py +1 -1
app.py
CHANGED
|
@@ -65,40 +65,52 @@ def restart_space():
|
|
| 65 |
API.restart_space(repo_id=REPO_ID)
|
| 66 |
|
| 67 |
|
| 68 |
-
try:
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
except Exception as e:
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
raw_data
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
# leaderboard_df_qa = leaderboard_df_qa[has_no_nan_values(df, _benchmark_cols)]
|
| 89 |
shown_columns_qa, types_qa = get_default_cols(
|
| 90 |
-
'qa', leaderboard_df_qa.columns, add_fix_cols=True)
|
| 91 |
-
leaderboard_df_qa = leaderboard_df_qa[~leaderboard_df_qa[COL_NAME_IS_ANONYMOUS]][shown_columns_qa]
|
| 92 |
-
leaderboard_df_qa.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
| 93 |
|
| 94 |
-
leaderboard_df_long_doc = original_df_long_doc.copy()
|
| 95 |
shown_columns_long_doc, types_long_doc = get_default_cols(
|
| 96 |
-
'long-doc', leaderboard_df_long_doc.columns, add_fix_cols=True)
|
| 97 |
-
leaderboard_df_long_doc = leaderboard_df_long_doc[~leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
|
| 98 |
-
leaderboard_df_long_doc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
| 99 |
|
| 100 |
-
|
| 101 |
-
reranking_models = sorted(list(frozenset([eval_result.reranking_model for eval_result in raw_data])))
|
| 102 |
|
| 103 |
|
| 104 |
def update_metric_qa(
|
|
@@ -110,7 +122,7 @@ def update_metric_qa(
|
|
| 110 |
show_anonymous: bool,
|
| 111 |
show_revision_and_timestamp,
|
| 112 |
):
|
| 113 |
-
return update_metric(raw_data, 'qa', metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
|
| 114 |
|
| 115 |
def update_metric_long_doc(
|
| 116 |
metric: str,
|
|
@@ -121,7 +133,7 @@ def update_metric_long_doc(
|
|
| 121 |
show_anonymous: bool,
|
| 122 |
show_revision_and_timestamp,
|
| 123 |
):
|
| 124 |
-
return update_metric(raw_data, "long-doc", metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
|
| 125 |
|
| 126 |
|
| 127 |
demo = gr.Blocks(css=custom_css)
|
|
@@ -160,10 +172,10 @@ with demo:
|
|
| 160 |
search_bar = get_search_bar()
|
| 161 |
# select reranking models
|
| 162 |
with gr.Column():
|
| 163 |
-
selected_rerankings = get_reranking_dropdown(reranking_models)
|
| 164 |
-
leaderboard_table = get_leaderboard_table(leaderboard_df_qa, types_qa)
|
| 165 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 166 |
-
hidden_leaderboard_table_for_search = get_leaderboard_table(original_df_qa, types_qa, visible=False)
|
| 167 |
|
| 168 |
set_listeners(
|
| 169 |
"qa",
|
|
@@ -198,11 +210,11 @@ with demo:
|
|
| 198 |
search_bar_retriever = get_search_bar()
|
| 199 |
with gr.Column(scale=1):
|
| 200 |
selected_noreranker = get_noreranking_dropdown()
|
| 201 |
-
lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 202 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
| 203 |
lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
|
| 204 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 205 |
-
hidden_lb_df_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 206 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
| 207 |
hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, types_qa, visible=False)
|
| 208 |
|
|
@@ -234,7 +246,7 @@ with demo:
|
|
| 234 |
queue=True
|
| 235 |
)
|
| 236 |
with gr.TabItem("Reranking Only", id=12):
|
| 237 |
-
lb_df_reranker = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
| 238 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
| 239 |
reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
| 240 |
with gr.Row():
|
|
@@ -243,7 +255,7 @@ with demo:
|
|
| 243 |
with gr.Column(scale=1):
|
| 244 |
search_bar_reranker = gr.Textbox(show_label=False, visible=False)
|
| 245 |
lb_table_reranker = get_leaderboard_table(lb_df_reranker, types_qa)
|
| 246 |
-
hidden_lb_df_reranker = original_df_qa[original_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
| 247 |
hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
|
| 248 |
hidden_lb_table_reranker = get_leaderboard_table(
|
| 249 |
hidden_lb_df_reranker, types_qa, visible=False
|
|
@@ -301,15 +313,15 @@ with demo:
|
|
| 301 |
search_bar = get_search_bar()
|
| 302 |
# select reranking model
|
| 303 |
with gr.Column():
|
| 304 |
-
selected_rerankings = get_reranking_dropdown(reranking_models)
|
| 305 |
|
| 306 |
lb_table = get_leaderboard_table(
|
| 307 |
-
leaderboard_df_long_doc, types_long_doc
|
| 308 |
)
|
| 309 |
|
| 310 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 311 |
hidden_lb_table_for_search = get_leaderboard_table(
|
| 312 |
-
original_df_long_doc, types_long_doc, visible=False
|
| 313 |
)
|
| 314 |
|
| 315 |
set_listeners(
|
|
@@ -345,12 +357,12 @@ with demo:
|
|
| 345 |
search_bar_retriever = get_search_bar()
|
| 346 |
with gr.Column(scale=1):
|
| 347 |
selected_noreranker = get_noreranking_dropdown()
|
| 348 |
-
lb_df_retriever_long_doc = leaderboard_df_long_doc[
|
| 349 |
-
leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 350 |
]
|
| 351 |
lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
|
| 352 |
-
hidden_lb_db_retriever_long_doc = original_df_long_doc[
|
| 353 |
-
original_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 354 |
]
|
| 355 |
hidden_lb_db_retriever_long_doc = reset_rank(hidden_lb_db_retriever_long_doc)
|
| 356 |
lb_table_retriever_long_doc = get_leaderboard_table(
|
|
@@ -386,8 +398,8 @@ with demo:
|
|
| 386 |
queue=True
|
| 387 |
)
|
| 388 |
with gr.TabItem("Reranking Only", id=22):
|
| 389 |
-
lb_df_reranker_ldoc = leaderboard_df_long_doc[
|
| 390 |
-
leaderboard_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
| 391 |
]
|
| 392 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
| 393 |
reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
|
@@ -397,7 +409,7 @@ with demo:
|
|
| 397 |
with gr.Column(scale=1):
|
| 398 |
search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
|
| 399 |
lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, types_long_doc)
|
| 400 |
-
hidden_lb_df_reranker_ldoc = original_df_long_doc[original_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
| 401 |
hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
|
| 402 |
hidden_lb_table_reranker_ldoc = get_leaderboard_table(
|
| 403 |
hidden_lb_df_reranker_ldoc, types_long_doc, visible=False
|
|
|
|
| 65 |
API.restart_space(repo_id=REPO_ID)
|
| 66 |
|
| 67 |
|
| 68 |
+
# try:
|
| 69 |
+
# snapshot_download(
|
| 70 |
+
# repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
|
| 71 |
+
# token=TOKEN
|
| 72 |
+
# )
|
| 73 |
+
# except Exception as e:
|
| 74 |
+
# print(f'failed to download')
|
| 75 |
+
# restart_space()
|
| 76 |
+
|
| 77 |
+
from dataclasses import dataclass
|
| 78 |
+
import pandas as pd
|
| 79 |
+
from typing import Optional
|
| 80 |
+
@dataclass
|
| 81 |
+
class LeaderboardDataStore:
|
| 82 |
+
raw_data: Optional[list]
|
| 83 |
+
original_df_qa: Optional[pd.DataFrame]
|
| 84 |
+
original_df_long_doc: Optional[pd.DataFrame]
|
| 85 |
+
leaderboard_df_qa: Optional[pd.DataFrame]
|
| 86 |
+
leaderboard_df_long_doc: Optional[pd.DataFrame]
|
| 87 |
+
reranking_models: Optional[list]
|
| 88 |
+
|
| 89 |
+
data = {}
|
| 90 |
+
data["AIR-Bench_24.04"] = LeaderboardDataStore(None, None, None, None, None, None)
|
| 91 |
+
data["AIR-Bench_24.04"].raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
|
| 92 |
+
data["AIR-Bench_24.04"].original_df_qa = get_leaderboard_df(
|
| 93 |
+
data["AIR-Bench_24.04"].raw_data, task='qa', metric=DEFAULT_METRIC_QA)
|
| 94 |
+
data["AIR-Bench_24.04"].original_df_long_doc = get_leaderboard_df(
|
| 95 |
+
data["AIR-Bench_24.04"].raw_data, task='long-doc', metric=DEFAULT_METRIC_LONG_DOC)
|
| 96 |
+
print(f'raw data: {len(data["AIR-Bench_24.04"].raw_data)}')
|
| 97 |
+
print(f'QA data loaded: {data["AIR-Bench_24.04"].original_df_qa.shape}')
|
| 98 |
+
print(f'Long-Doc data loaded: {len(data["AIR-Bench_24.04"].original_df_long_doc)}')
|
| 99 |
+
|
| 100 |
+
data["AIR-Bench_24.04"].leaderboard_df_qa = data["AIR-Bench_24.04"].original_df_qa.copy()
|
| 101 |
# leaderboard_df_qa = leaderboard_df_qa[has_no_nan_values(df, _benchmark_cols)]
|
| 102 |
shown_columns_qa, types_qa = get_default_cols(
|
| 103 |
+
'qa', data["AIR-Bench_24.04"].leaderboard_df_qa.columns, add_fix_cols=True)
|
| 104 |
+
data["AIR-Bench_24.04"].leaderboard_df_qa = data["AIR-Bench_24.04"].leaderboard_df_qa[~data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_IS_ANONYMOUS]][shown_columns_qa]
|
| 105 |
+
data["AIR-Bench_24.04"].leaderboard_df_qa.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
| 106 |
|
| 107 |
+
data["AIR-Bench_24.04"].leaderboard_df_long_doc = data["AIR-Bench_24.04"].original_df_long_doc.copy()
|
| 108 |
shown_columns_long_doc, types_long_doc = get_default_cols(
|
| 109 |
+
'long-doc', data["AIR-Bench_24.04"].leaderboard_df_long_doc.columns, add_fix_cols=True)
|
| 110 |
+
data["AIR-Bench_24.04"].leaderboard_df_long_doc = data["AIR-Bench_24.04"].leaderboard_df_long_doc[~data["AIR-Bench_24.04"].leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
|
| 111 |
+
data["AIR-Bench_24.04"].leaderboard_df_long_doc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
| 112 |
|
| 113 |
+
data["AIR-Bench_24.04"].reranking_models = sorted(list(frozenset([eval_result.reranking_model for eval_result in data["AIR-Bench_24.04"].raw_data])))
|
|
|
|
| 114 |
|
| 115 |
|
| 116 |
def update_metric_qa(
|
|
|
|
| 122 |
show_anonymous: bool,
|
| 123 |
show_revision_and_timestamp,
|
| 124 |
):
|
| 125 |
+
return update_metric(data["AIR-Bench_24.04"].raw_data, 'qa', metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
|
| 126 |
|
| 127 |
def update_metric_long_doc(
|
| 128 |
metric: str,
|
|
|
|
| 133 |
show_anonymous: bool,
|
| 134 |
show_revision_and_timestamp,
|
| 135 |
):
|
| 136 |
+
return update_metric(data["AIR-Bench_24.04"].raw_data, "long-doc", metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
|
| 137 |
|
| 138 |
|
| 139 |
demo = gr.Blocks(css=custom_css)
|
|
|
|
| 172 |
search_bar = get_search_bar()
|
| 173 |
# select reranking models
|
| 174 |
with gr.Column():
|
| 175 |
+
selected_rerankings = get_reranking_dropdown(data["AIR-Bench_24.04"].reranking_models)
|
| 176 |
+
leaderboard_table = get_leaderboard_table(data["AIR-Bench_24.04"].leaderboard_df_qa, types_qa)
|
| 177 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 178 |
+
hidden_leaderboard_table_for_search = get_leaderboard_table(data["AIR-Bench_24.04"].original_df_qa, types_qa, visible=False)
|
| 179 |
|
| 180 |
set_listeners(
|
| 181 |
"qa",
|
|
|
|
| 210 |
search_bar_retriever = get_search_bar()
|
| 211 |
with gr.Column(scale=1):
|
| 212 |
selected_noreranker = get_noreranking_dropdown()
|
| 213 |
+
lb_df_retriever = data["AIR-Bench_24.04"].leaderboard_df_qa[data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 214 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
| 215 |
lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
|
| 216 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 217 |
+
hidden_lb_df_retriever = data["AIR-Bench_24.04"].original_df_qa[data["AIR-Bench_24.04"].original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
|
| 218 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
| 219 |
hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, types_qa, visible=False)
|
| 220 |
|
|
|
|
| 246 |
queue=True
|
| 247 |
)
|
| 248 |
with gr.TabItem("Reranking Only", id=12):
|
| 249 |
+
lb_df_reranker = data["AIR-Bench_24.04"].leaderboard_df_qa[data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
| 250 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
| 251 |
reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
| 252 |
with gr.Row():
|
|
|
|
| 255 |
with gr.Column(scale=1):
|
| 256 |
search_bar_reranker = gr.Textbox(show_label=False, visible=False)
|
| 257 |
lb_table_reranker = get_leaderboard_table(lb_df_reranker, types_qa)
|
| 258 |
+
hidden_lb_df_reranker = data["AIR-Bench_24.04"].original_df_qa[data["AIR-Bench_24.04"].original_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
| 259 |
hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
|
| 260 |
hidden_lb_table_reranker = get_leaderboard_table(
|
| 261 |
hidden_lb_df_reranker, types_qa, visible=False
|
|
|
|
| 313 |
search_bar = get_search_bar()
|
| 314 |
# select reranking model
|
| 315 |
with gr.Column():
|
| 316 |
+
selected_rerankings = get_reranking_dropdown(data["AIR-Bench_24.04"].reranking_models)
|
| 317 |
|
| 318 |
lb_table = get_leaderboard_table(
|
| 319 |
+
data["AIR-Bench_24.04"].leaderboard_df_long_doc, types_long_doc
|
| 320 |
)
|
| 321 |
|
| 322 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
| 323 |
hidden_lb_table_for_search = get_leaderboard_table(
|
| 324 |
+
data["AIR-Bench_24.04"].original_df_long_doc, types_long_doc, visible=False
|
| 325 |
)
|
| 326 |
|
| 327 |
set_listeners(
|
|
|
|
| 357 |
search_bar_retriever = get_search_bar()
|
| 358 |
with gr.Column(scale=1):
|
| 359 |
selected_noreranker = get_noreranking_dropdown()
|
| 360 |
+
lb_df_retriever_long_doc = data["AIR-Bench_24.04"].leaderboard_df_long_doc[
|
| 361 |
+
data["AIR-Bench_24.04"].leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 362 |
]
|
| 363 |
lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
|
| 364 |
+
hidden_lb_db_retriever_long_doc = data["AIR-Bench_24.04"].original_df_long_doc[
|
| 365 |
+
data["AIR-Bench_24.04"].original_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
| 366 |
]
|
| 367 |
hidden_lb_db_retriever_long_doc = reset_rank(hidden_lb_db_retriever_long_doc)
|
| 368 |
lb_table_retriever_long_doc = get_leaderboard_table(
|
|
|
|
| 398 |
queue=True
|
| 399 |
)
|
| 400 |
with gr.TabItem("Reranking Only", id=22):
|
| 401 |
+
lb_df_reranker_ldoc = data["AIR-Bench_24.04"].leaderboard_df_long_doc[
|
| 402 |
+
data["AIR-Bench_24.04"].leaderboard_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
| 403 |
]
|
| 404 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
| 405 |
reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
|
|
|
| 409 |
with gr.Column(scale=1):
|
| 410 |
search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
|
| 411 |
lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, types_long_doc)
|
| 412 |
+
hidden_lb_df_reranker_ldoc = data["AIR-Bench_24.04"].original_df_long_doc[data["AIR-Bench_24.04"].original_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
|
| 413 |
hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
|
| 414 |
hidden_lb_table_reranker_ldoc = get_leaderboard_table(
|
| 415 |
hidden_lb_df_reranker_ldoc, types_long_doc, visible=False
|
src/envs.py
CHANGED
|
@@ -27,7 +27,7 @@ BM25_LINK = model_hyperlink("https://github.com/castorini/pyserini", "BM25")
|
|
| 27 |
|
| 28 |
BENCHMARK_VERSION_LIST = [
|
| 29 |
"AIR-Bench_24.04",
|
| 30 |
-
"AIR-Bench_24.05",
|
| 31 |
]
|
| 32 |
|
| 33 |
LATEST_BENCHMARK_VERSION = BENCHMARK_VERSION_LIST[-1]
|
|
|
|
| 27 |
|
| 28 |
BENCHMARK_VERSION_LIST = [
|
| 29 |
"AIR-Bench_24.04",
|
| 30 |
+
# "AIR-Bench_24.05",
|
| 31 |
]
|
| 32 |
|
| 33 |
LATEST_BENCHMARK_VERSION = BENCHMARK_VERSION_LIST[-1]
|