Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
warning
Browse files- app.py +3 -0
- src/leaderboard/read_evals.py +12 -10
app.py
CHANGED
|
@@ -63,6 +63,8 @@ leaderboard_df = original_df.copy()
|
|
| 63 |
leaderboard_df = leaderboard_df[leaderboard_df[AutoEvalColumn.still_on_hub.name] == True]
|
| 64 |
# leaderboard_df = leaderboard_df[('speakleash' not in leaderboard_df['model_name_for_query']) | ('Bielik' in leaderboard_df['model_name_for_query'])]
|
| 65 |
|
|
|
|
|
|
|
| 66 |
(
|
| 67 |
finished_eval_queue_df,
|
| 68 |
running_eval_queue_df,
|
|
@@ -400,6 +402,7 @@ with demo:
|
|
| 400 |
elem_id="citation-button",
|
| 401 |
show_copy_button=True,
|
| 402 |
)
|
|
|
|
| 403 |
|
| 404 |
scheduler = BackgroundScheduler()
|
| 405 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
|
|
|
| 63 |
leaderboard_df = leaderboard_df[leaderboard_df[AutoEvalColumn.still_on_hub.name] == True]
|
| 64 |
# leaderboard_df = leaderboard_df[('speakleash' not in leaderboard_df['model_name_for_query']) | ('Bielik' in leaderboard_df['model_name_for_query'])]
|
| 65 |
|
| 66 |
+
original_df.to_csv("output.csv")
|
| 67 |
+
|
| 68 |
(
|
| 69 |
finished_eval_queue_df,
|
| 70 |
running_eval_queue_df,
|
|
|
|
| 402 |
elem_id="citation-button",
|
| 403 |
show_copy_button=True,
|
| 404 |
)
|
| 405 |
+
csv = gr.File(interactive=False, value="output.csv")
|
| 406 |
|
| 407 |
scheduler = BackgroundScheduler()
|
| 408 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -433,16 +433,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
| 433 |
for k,v in eval_results.items():
|
| 434 |
v.results = {k: v for k, (v, start_date) in v.results.items()}
|
| 435 |
|
| 436 |
-
|
| 437 |
-
for v in eval_results.values():
|
| 438 |
-
try:
|
| 439 |
-
print(v)
|
| 440 |
-
v.to_dict() # we test if the dict version is complete
|
| 441 |
-
# if v.results:
|
| 442 |
-
results.append(v)
|
| 443 |
-
except KeyError: # not all eval values present
|
| 444 |
-
print(f"not all eval values present {v.eval_name} {v.full_model}")
|
| 445 |
-
continue
|
| 446 |
|
| 447 |
all_models = []
|
| 448 |
missing_results_for_task = {}
|
|
@@ -473,6 +464,17 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
| 473 |
missing_metadata.append(f"{v.full_model}")
|
| 474 |
all_models.append((v.full_model, v.num_params, v.still_on_hub))
|
| 475 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
print(f"Missing sbatch results:")
|
| 477 |
for r in for_run:
|
| 478 |
if r[0]==5 and r[1] in ['polish_eq_bench']: continue
|
|
|
|
| 433 |
for k,v in eval_results.items():
|
| 434 |
v.results = {k: v for k, (v, start_date) in v.results.items()}
|
| 435 |
|
| 436 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 437 |
|
| 438 |
all_models = []
|
| 439 |
missing_results_for_task = {}
|
|
|
|
| 464 |
missing_metadata.append(f"{v.full_model}")
|
| 465 |
all_models.append((v.full_model, v.num_params, v.still_on_hub))
|
| 466 |
|
| 467 |
+
results = []
|
| 468 |
+
for v in eval_results.values():
|
| 469 |
+
try:
|
| 470 |
+
print(v)
|
| 471 |
+
v.to_dict() # we test if the dict version is complete
|
| 472 |
+
# if v.results:
|
| 473 |
+
results.append(v)
|
| 474 |
+
except KeyError: # not all eval values present
|
| 475 |
+
print(f"not all eval values present {v.eval_name} {v.full_model}")
|
| 476 |
+
continue
|
| 477 |
+
|
| 478 |
print(f"Missing sbatch results:")
|
| 479 |
for r in for_run:
|
| 480 |
if r[0]==5 and r[1] in ['polish_eq_bench']: continue
|