Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: add is_anonymous field
Browse files- app.py +2 -2
- src/display/utils.py +5 -0
- src/read_evals.py +6 -2
- src/utils.py +3 -3
app.py
CHANGED
@@ -315,7 +315,7 @@ with demo:
|
|
315 |
with gr.Row():
|
316 |
file_output = gr.File()
|
317 |
with gr.Row():
|
318 |
-
|
319 |
label="Nope. I want to submit anonymously 🥷",
|
320 |
value=False,
|
321 |
info="Do you want to shown on the leaderboard by default?")
|
@@ -336,7 +336,7 @@ with demo:
|
|
336 |
model_name,
|
337 |
model_url,
|
338 |
benchmark_version,
|
339 |
-
|
340 |
],
|
341 |
submission_result,
|
342 |
show_progress="hidden"
|
|
|
315 |
with gr.Row():
|
316 |
file_output = gr.File()
|
317 |
with gr.Row():
|
318 |
+
is_anonymous = gr.Checkbox(
|
319 |
label="Nope. I want to submit anonymously 🥷",
|
320 |
value=False,
|
321 |
info="Do you want to shown on the leaderboard by default?")
|
|
|
336 |
model_name,
|
337 |
model_url,
|
338 |
benchmark_version,
|
339 |
+
is_anonymous
|
340 |
],
|
341 |
submission_result,
|
342 |
show_progress="hidden"
|
src/display/utils.py
CHANGED
@@ -27,6 +27,7 @@ COL_NAME_RERANKING_MODEL_LINK = "Reranking Model LINK"
|
|
27 |
COL_NAME_RANK = "Rank 🏆"
|
28 |
COL_NAME_REVISION = "Revision"
|
29 |
COL_NAME_TIMESTAMP = "Submission Date"
|
|
|
30 |
|
31 |
|
32 |
def get_default_auto_eval_column_dict():
|
@@ -56,8 +57,12 @@ def get_default_auto_eval_column_dict():
|
|
56 |
auto_eval_column_dict.append(
|
57 |
["reranking_model_link", ColumnContent, ColumnContent(COL_NAME_RERANKING_MODEL, "markdown", False, hidden=True, never_hidden=False)]
|
58 |
)
|
|
|
|
|
|
|
59 |
return auto_eval_column_dict
|
60 |
|
|
|
61 |
def make_autoevalcolumn(cls_name="BenchmarksQA", benchmarks=BenchmarksQA):
|
62 |
auto_eval_column_dict = get_default_auto_eval_column_dict()
|
63 |
## Leaderboard columns
|
|
|
27 |
COL_NAME_RANK = "Rank 🏆"
|
28 |
COL_NAME_REVISION = "Revision"
|
29 |
COL_NAME_TIMESTAMP = "Submission Date"
|
30 |
+
COL_NAME_IS_ANONYMOUS = "Anonymous Submission"
|
31 |
|
32 |
|
33 |
def get_default_auto_eval_column_dict():
|
|
|
57 |
auto_eval_column_dict.append(
|
58 |
["reranking_model_link", ColumnContent, ColumnContent(COL_NAME_RERANKING_MODEL, "markdown", False, hidden=True, never_hidden=False)]
|
59 |
)
|
60 |
+
auto_eval_column_dict.append(
|
61 |
+
["is_anonymous", ColumnContent, ColumnContent(COL_NAME_IS_ANONYMOUS, "bool", False, hidden=True)]
|
62 |
+
)
|
63 |
return auto_eval_column_dict
|
64 |
|
65 |
+
|
66 |
def make_autoevalcolumn(cls_name="BenchmarksQA", benchmarks=BenchmarksQA):
|
67 |
auto_eval_column_dict = get_default_auto_eval_column_dict()
|
68 |
## Leaderboard columns
|
src/read_evals.py
CHANGED
@@ -40,6 +40,7 @@ class EvalResult:
|
|
40 |
metric: str
|
41 |
timestamp: str = "" # submission timestamp
|
42 |
revision: str = ""
|
|
|
43 |
|
44 |
|
45 |
@dataclass
|
@@ -55,6 +56,7 @@ class FullEvalResult:
|
|
55 |
results: List[EvalResult] # results on all the EvalResults over different tasks and metrics.
|
56 |
timestamp: str = ""
|
57 |
revision: str = ""
|
|
|
58 |
|
59 |
@classmethod
|
60 |
def init_from_json_file(cls, json_filepath):
|
@@ -87,7 +89,8 @@ class FullEvalResult:
|
|
87 |
task=config["task"],
|
88 |
metric=config["metric"],
|
89 |
timestamp=config.get("timestamp", "2024-05-12T12:24:02Z"),
|
90 |
-
revision=config.get("revision", "3a2ba9dcad796a48a02ca1147557724e")
|
|
|
91 |
)
|
92 |
result_list.append(eval_result)
|
93 |
return cls(
|
@@ -98,7 +101,8 @@ class FullEvalResult:
|
|
98 |
reranking_model_link=reranking_model_link,
|
99 |
results=result_list,
|
100 |
timestamp=result_list[0].timestamp,
|
101 |
-
revision=result_list[0].revision
|
|
|
102 |
)
|
103 |
|
104 |
def to_dict(self, task='qa', metric='ndcg_at_3') -> List:
|
|
|
40 |
metric: str
|
41 |
timestamp: str = "" # submission timestamp
|
42 |
revision: str = ""
|
43 |
+
is_anonymous: bool = False
|
44 |
|
45 |
|
46 |
@dataclass
|
|
|
56 |
results: List[EvalResult] # results on all the EvalResults over different tasks and metrics.
|
57 |
timestamp: str = ""
|
58 |
revision: str = ""
|
59 |
+
is_anonymous: bool = False
|
60 |
|
61 |
@classmethod
|
62 |
def init_from_json_file(cls, json_filepath):
|
|
|
89 |
task=config["task"],
|
90 |
metric=config["metric"],
|
91 |
timestamp=config.get("timestamp", "2024-05-12T12:24:02Z"),
|
92 |
+
revision=config.get("revision", "3a2ba9dcad796a48a02ca1147557724e"),
|
93 |
+
is_anonymous=config.get("is_anonymous", False)
|
94 |
)
|
95 |
result_list.append(eval_result)
|
96 |
return cls(
|
|
|
101 |
reranking_model_link=reranking_model_link,
|
102 |
results=result_list,
|
103 |
timestamp=result_list[0].timestamp,
|
104 |
+
revision=result_list[0].revision,
|
105 |
+
is_anonymous=result_list[0].is_anonymous
|
106 |
)
|
107 |
|
108 |
def to_dict(self, task='qa', metric='ndcg_at_3') -> List:
|
src/utils.py
CHANGED
@@ -59,7 +59,7 @@ def get_default_cols(task: str, columns: list = [], add_fix_cols: bool = True) -
|
|
59 |
for col_name, col_type in zip(cols_list, types_list):
|
60 |
if col_name not in benchmark_list:
|
61 |
continue
|
62 |
-
if columns and col_name not in columns:
|
63 |
continue
|
64 |
cols.append(col_name)
|
65 |
types.append(col_type)
|
@@ -178,7 +178,7 @@ def get_iso_format_timestamp():
|
|
178 |
return iso_format_timestamp, filename_friendly_timestamp
|
179 |
|
180 |
|
181 |
-
def submit_results(filepath: str, model: str, model_url: str, version: str = "AIR-Bench_24.04",
|
182 |
if not filepath.endswith(".zip"):
|
183 |
return styled_error(f"file uploading aborted. wrong file type: {filepath}")
|
184 |
|
@@ -218,7 +218,7 @@ def submit_results(filepath: str, model: str, model_url: str, version: str = "AI
|
|
218 |
"model_name": f"{model}",
|
219 |
"model_url": f"{model_url}",
|
220 |
"version": f"{version}",
|
221 |
-
"
|
222 |
"revision": f"{revision}",
|
223 |
"timestamp": f"{timestamp_config}"
|
224 |
}
|
|
|
59 |
for col_name, col_type in zip(cols_list, types_list):
|
60 |
if col_name not in benchmark_list:
|
61 |
continue
|
62 |
+
if len(columns) > 0 and col_name not in columns:
|
63 |
continue
|
64 |
cols.append(col_name)
|
65 |
types.append(col_type)
|
|
|
178 |
return iso_format_timestamp, filename_friendly_timestamp
|
179 |
|
180 |
|
181 |
+
def submit_results(filepath: str, model: str, model_url: str, version: str = "AIR-Bench_24.04", is_anonymous=False):
|
182 |
if not filepath.endswith(".zip"):
|
183 |
return styled_error(f"file uploading aborted. wrong file type: {filepath}")
|
184 |
|
|
|
218 |
"model_name": f"{model}",
|
219 |
"model_url": f"{model_url}",
|
220 |
"version": f"{version}",
|
221 |
+
"is_anonymous": f"{is_anonymous}",
|
222 |
"revision": f"{revision}",
|
223 |
"timestamp": f"{timestamp_config}"
|
224 |
}
|