Commit
·
4f25de8
1
Parent(s):
1bf4289
updates
Browse files- app.py +112 -0
- metric.py +125 -0
- process_data.py +140 -0
- requirements.txt +2 -0
- run.sh +1 -0
- task1.csv +10 -0
- task2.csv +5 -0
- update_data.sh +3 -0
app.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from pathlib import Path
|
3 |
+
import pandas as pd
|
4 |
+
import json
|
5 |
+
import metric
|
6 |
+
from sklearn.metrics import roc_auc_score, roc_curve
|
7 |
+
import numpy as np
|
8 |
+
import altair as alt
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
st.set_page_config(
|
13 |
+
page_title="Public Leaderboard",
|
14 |
+
initial_sidebar_state="collapsed",
|
15 |
+
layout="wide", # This makes the app use the full width of the screen
|
16 |
+
)
|
17 |
+
|
18 |
+
@st.cache_data
|
19 |
+
def load_results(task):
|
20 |
+
return pd.read_csv(task).set_index("team")
|
21 |
+
|
22 |
+
|
23 |
+
split = "public"
|
24 |
+
|
25 |
+
def show_leaderboad(results):
|
26 |
+
cols = [
|
27 |
+
"generated_accuracy",
|
28 |
+
"pristine_accuracy",
|
29 |
+
"balanced_accuracy",
|
30 |
+
"fail_rate",
|
31 |
+
"total_time",
|
32 |
+
]
|
33 |
+
|
34 |
+
# st.dataframe(results[f"{split}_score"])
|
35 |
+
|
36 |
+
column_config = {
|
37 |
+
"balanced_accuracy": st.column_config.ProgressColumn(
|
38 |
+
"Balanced Acc", format="compact", min_value=0, pinned=True, max_value=1.0, width = "large"
|
39 |
+
),
|
40 |
+
"generated_accuracy": st.column_config.ProgressColumn(
|
41 |
+
"🤖 Acc", format="compact", min_value=0, pinned=True, max_value=1.0, width = "large"
|
42 |
+
),
|
43 |
+
"pristine_accuracy": st.column_config.ProgressColumn(
|
44 |
+
"🧑🎤 Acc", format="compact", min_value=0, pinned=True, max_value=1.0, width = "large"
|
45 |
+
),
|
46 |
+
"fail_rate": st.column_config.NumberColumn(
|
47 |
+
"❌ Fail Rate",
|
48 |
+
format="compact",
|
49 |
+
width = "small",
|
50 |
+
),
|
51 |
+
"fail_total_timerate": st.column_config.NumberColumn(
|
52 |
+
"🕒 Inference Time",
|
53 |
+
format="compact",
|
54 |
+
width = "small",
|
55 |
+
),
|
56 |
+
}
|
57 |
+
|
58 |
+
labels = {"pristine": "🧑🎤", "generated": "🤖"}
|
59 |
+
|
60 |
+
for c in results[f"{split}_score"].columns:
|
61 |
+
if "accuracy" in c:
|
62 |
+
continue
|
63 |
+
if any(p in c for p in ["generated", "pristine"]):
|
64 |
+
s = c.split("_")
|
65 |
+
pred = s[0]
|
66 |
+
source = " ".join(s[1:])
|
67 |
+
column_config[c] = st.column_config.ProgressColumn(
|
68 |
+
labels[pred] + " " + source,
|
69 |
+
help=c,
|
70 |
+
format="compact",
|
71 |
+
min_value=0,
|
72 |
+
max_value=1.0,
|
73 |
+
)
|
74 |
+
|
75 |
+
"#### Summary"
|
76 |
+
st.dataframe(results[f"{split}_score"].loc[:, cols], column_config=column_config)
|
77 |
+
|
78 |
+
"#### Accuracy on 🤖 Generated by Source"
|
79 |
+
|
80 |
+
cols = [
|
81 |
+
c
|
82 |
+
for c in results[f"{split}_score"].columns
|
83 |
+
if "generated" in c and "accuracy" not in c
|
84 |
+
]
|
85 |
+
st.dataframe(results[f"{split}_score"].loc[:, cols], column_config=column_config)
|
86 |
+
|
87 |
+
"#### Accuracy on 🧑🎤 Pristine by Source"
|
88 |
+
|
89 |
+
cols = [
|
90 |
+
c
|
91 |
+
for c in results[f"{split}_score"].columns
|
92 |
+
if "pristine" in c and "accuracy" not in c
|
93 |
+
]
|
94 |
+
st.dataframe(results[f"{split}_score"].loc[:, cols], column_config=column_config)
|
95 |
+
|
96 |
+
|
97 |
+
split = "public"
|
98 |
+
|
99 |
+
|
100 |
+
st.markdown("#### Detailed Public Leaderboard")
|
101 |
+
st.markdown("[SAFE: Synthetic Audio Forensics Evaluation Challenge](https://stresearch.github.io/SAFE/)")
|
102 |
+
|
103 |
+
t1,t2 = st.tabs(["**Task 1**","**Task 2**"])
|
104 |
+
with t1:
|
105 |
+
results1 = {f"{split}_score": load_results("task1.csv")}
|
106 |
+
show_leaderboad(results1)
|
107 |
+
|
108 |
+
with t2:
|
109 |
+
results2 = {f"{split}_score": load_results("task2.csv")}
|
110 |
+
show_leaderboad(results2)
|
111 |
+
|
112 |
+
|
metric.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
+
import json
|
4 |
+
|
5 |
+
|
6 |
+
def _metric(solution_df,submission_df, mode = "top_level", admin = False):
|
7 |
+
"""
|
8 |
+
This function calculates the accuracy of the generated predictions.
|
9 |
+
|
10 |
+
Parameters
|
11 |
+
----------
|
12 |
+
solution_df : pandas.DataFrame
|
13 |
+
The dataframe containing the solution data.
|
14 |
+
submission_df : pandas.DataFrame
|
15 |
+
The dataframe containing the submission data.
|
16 |
+
mode : str, optional
|
17 |
+
The mode of evaluation. Can be "top_level" or "bottom_level". The default is "top_level".
|
18 |
+
|
19 |
+
Returns
|
20 |
+
-------
|
21 |
+
None.
|
22 |
+
"""
|
23 |
+
|
24 |
+
|
25 |
+
solution_df["submission_pred"] = submission_df["pred"]
|
26 |
+
|
27 |
+
if admin:
|
28 |
+
source_col = "source_og"
|
29 |
+
else:
|
30 |
+
source_col = "source"
|
31 |
+
|
32 |
+
|
33 |
+
cols = ["split","pred", source_col]
|
34 |
+
|
35 |
+
|
36 |
+
solution_df["correct"] = solution_df["pred"] == solution_df["submission_pred"]
|
37 |
+
accuracy = solution_df.groupby(cols)["correct"].mean().to_frame("accuracy").reset_index()
|
38 |
+
accuracy["score_name"] = accuracy["pred"] +"_"+ accuracy[source_col]
|
39 |
+
|
40 |
+
evaluation = {}
|
41 |
+
|
42 |
+
split = "public"
|
43 |
+
|
44 |
+
temp = accuracy.query(f"split=='{split}'")
|
45 |
+
scores_by_source = temp.set_index("score_name")["accuracy"].sort_index()
|
46 |
+
scores_by_source["generated_accuracy"] = temp.query("pred=='generated'")["accuracy"].mean()
|
47 |
+
scores_by_source["pristine_accuracy"] = temp.query("pred=='pristine'")["accuracy"].mean()
|
48 |
+
scores_by_source["balanced_accuracy"] = (scores_by_source["generated_accuracy"] + scores_by_source["pristine_accuracy"])/2.
|
49 |
+
|
50 |
+
|
51 |
+
if mode == "top_level":
|
52 |
+
scores_to_save = ["generated_accuracy", "pristine_accuracy", "balanced_accuracy"]
|
53 |
+
evaluation[f"{split}_score"] = scores_by_source.loc[scores_to_save].to_dict()
|
54 |
+
else:
|
55 |
+
evaluation[f"{split}_score"] = scores_by_source.to_dict()
|
56 |
+
|
57 |
+
split = "private"
|
58 |
+
# private has everything
|
59 |
+
|
60 |
+
temp = accuracy
|
61 |
+
scores_by_source = temp.set_index("score_name")["accuracy"].sort_index()
|
62 |
+
scores_by_source["generated_accuracy"] = temp.query("pred=='generated'")["accuracy"].mean()
|
63 |
+
scores_by_source["pristine_accuracy"] = temp.query("pred=='pristine'")["accuracy"].mean()
|
64 |
+
scores_by_source["balanced_accuracy"] = (scores_by_source["generated_accuracy"] + scores_by_source["pristine_accuracy"])/2.
|
65 |
+
|
66 |
+
if mode == "top_level":
|
67 |
+
scores_to_save = ["generated_accuracy", "pristine_accuracy", "balanced_accuracy"]
|
68 |
+
evaluation[f"{split}_score"] = scores_by_source.loc[scores_to_save].to_dict()
|
69 |
+
else:
|
70 |
+
evaluation[f"{split}_score"] = scores_by_source.to_dict()
|
71 |
+
|
72 |
+
|
73 |
+
if "time" in submission_df.columns:
|
74 |
+
solution_df["submission_time"] = submission_df["time"]
|
75 |
+
|
76 |
+
split = "public"
|
77 |
+
evaluation[f"{split}_score"]["total_time"] = float(solution_df.query(f"split=='{split}'")["submission_time"].sum())
|
78 |
+
|
79 |
+
split = "private"
|
80 |
+
evaluation[f"{split}_score"]["total_time"] = float(solution_df["submission_time"].sum())
|
81 |
+
else:
|
82 |
+
for split in ["public","private"]:
|
83 |
+
evaluation[f"{split}_score"]["total_time"] = -1
|
84 |
+
|
85 |
+
|
86 |
+
if "score" in submission_df.columns:
|
87 |
+
solution_df["submission_score"] = submission_df["score"]
|
88 |
+
|
89 |
+
split = "public"
|
90 |
+
evaluation[f"{split}_score"]["fail_rate"] = float(solution_df.query(f"split=='{split}'")["submission_score"].isna().mean())
|
91 |
+
|
92 |
+
split = "private"
|
93 |
+
evaluation[f"{split}_score"]["fail_rate"] = float(solution_df["submission_score"].isna().mean())
|
94 |
+
|
95 |
+
else:
|
96 |
+
for split in ["public","private"]:
|
97 |
+
evaluation[f"{split}_score"]["fail_rate"] = -1
|
98 |
+
|
99 |
+
|
100 |
+
|
101 |
+
return evaluation
|
102 |
+
|
103 |
+
|
104 |
+
|
105 |
+
def compute(params):
|
106 |
+
solution_file = hf_hub_download(
|
107 |
+
repo_id=params.competition_id,
|
108 |
+
filename="solution.csv",
|
109 |
+
token=params.token,
|
110 |
+
repo_type="dataset",
|
111 |
+
)
|
112 |
+
|
113 |
+
solution_df = pd.read_csv(solution_file).set_index(params.submission_id_col)
|
114 |
+
|
115 |
+
submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
|
116 |
+
submission_file = hf_hub_download(
|
117 |
+
repo_id=params.competition_id,
|
118 |
+
filename=submission_filename,
|
119 |
+
token=params.token,
|
120 |
+
repo_type="dataset",
|
121 |
+
)
|
122 |
+
|
123 |
+
submission_df = pd.read_csv(submission_file).set_index(params.submission_id_col)
|
124 |
+
|
125 |
+
return _metric(solution_df,submission_df)
|
process_data.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from pathlib import Path
|
3 |
+
from pathlib import Path
|
4 |
+
import pandas as pd
|
5 |
+
import json
|
6 |
+
import metric
|
7 |
+
from sklearn.metrics import roc_auc_score, roc_curve
|
8 |
+
import numpy as np
|
9 |
+
import altair as alt
|
10 |
+
|
11 |
+
_metric = metric._metric
|
12 |
+
|
13 |
+
|
14 |
+
def get_submission(f):
|
15 |
+
submission_info = json.load(open(f))
|
16 |
+
submissions = pd.DataFrame(submission_info["submissions"])
|
17 |
+
submissions["team_id"] = submission_info["id"]
|
18 |
+
|
19 |
+
return submissions
|
20 |
+
|
21 |
+
def get_submissions_file(f):
|
22 |
+
submission_df = pd.read_csv(f).set_index("id")
|
23 |
+
if isinstance(submission_df.iloc[0]["score"],str):
|
24 |
+
submission_df.loc[:, "score"] = submission_df.loc[:, "score"].apply(lambda a: json.loads(a)[0])
|
25 |
+
return submission_df
|
26 |
+
|
27 |
+
def load_results(local_dir):
|
28 |
+
team_file_name = "teams.json"
|
29 |
+
team_info = pd.read_json(Path(local_dir) / team_file_name).T
|
30 |
+
team_info.loc["baselines", "name"] = "baselines"
|
31 |
+
submission_info_dir = "submission_info"
|
32 |
+
submission_info_files = list((Path(local_dir) / submission_info_dir).glob("*.json"))
|
33 |
+
# submission_info_files += ["baselines/baselines.json"]
|
34 |
+
submissions = pd.concat(
|
35 |
+
[get_submission(f) for f in submission_info_files], ignore_index=True
|
36 |
+
)
|
37 |
+
submissions.loc[:, "team"] = team_info.loc[
|
38 |
+
submissions["team_id"].values, "name"
|
39 |
+
].values
|
40 |
+
|
41 |
+
submissions["submission_files"] = submissions.apply(
|
42 |
+
lambda a: (
|
43 |
+
str(
|
44 |
+
Path(local_dir)
|
45 |
+
/ "submissions"
|
46 |
+
/ (a["team_id"] + "-" + a["submission_id"] + ".csv")
|
47 |
+
)
|
48 |
+
if a["team_id"] != "baselines"
|
49 |
+
else str(
|
50 |
+
Path("baselines") / (a["team_id"] + "-" + a["submission_id"] + ".csv")
|
51 |
+
)
|
52 |
+
),
|
53 |
+
axis=1,
|
54 |
+
)
|
55 |
+
submissions = submissions.drop(columns=["public_score", "private_score"])
|
56 |
+
submissions["submission"] = (
|
57 |
+
submissions["team"] + " - " + submissions["submission_repo"]
|
58 |
+
)
|
59 |
+
return submissions
|
60 |
+
|
61 |
+
|
62 |
+
def compute_metrics(submissions, local_dir, admin=True):
|
63 |
+
|
64 |
+
submissions=submissions.query("status==3.0")
|
65 |
+
|
66 |
+
if not admin:
|
67 |
+
selected_by_team = submissions.groupby("team")["selected"].sum()
|
68 |
+
teams_no_selected = selected_by_team.index[selected_by_team==0]
|
69 |
+
submissions.loc[submissions.team.isin(teams_no_selected),"selected"] = True
|
70 |
+
submissions = submissions.query("selected")
|
71 |
+
|
72 |
+
solution_df = pd.read_csv(Path(local_dir) / "solution.csv").set_index("id")
|
73 |
+
|
74 |
+
results = {"private_score": [], "public_score": []}
|
75 |
+
|
76 |
+
fields = ["team_id", "team", "submission_id", "submission_repo"]
|
77 |
+
for i, row in submissions.T.items():
|
78 |
+
# r = pd.read_csv(row["submission_files"]).set_index("id")
|
79 |
+
r = get_submissions_file(row["submission_files"])
|
80 |
+
eval = _metric(solution_df, r, mode="detailed", admin=admin)
|
81 |
+
for m in ["private_score", "public_score"]:
|
82 |
+
for f in fields:
|
83 |
+
eval[m][f] = row[f]
|
84 |
+
eval[m]["submission"] = f"{row.team} - {row.submission_repo}"
|
85 |
+
|
86 |
+
eval[m] = pd.Series(eval[m]).to_frame().T
|
87 |
+
results[m].append(eval[m])
|
88 |
+
|
89 |
+
for m in ["private_score", "public_score"]:
|
90 |
+
temp = pd.concat(results[m], ignore_index=True).T
|
91 |
+
temp.index.name = "metric"
|
92 |
+
temp = temp.reset_index()
|
93 |
+
|
94 |
+
# def parse(s):
|
95 |
+
# if any(p in s for p in ["generated","pristine"]):
|
96 |
+
# s = s.split("_")
|
97 |
+
# return pd.Series(dict(pred = s[0], source = "_".join(s[1:])))
|
98 |
+
# else:
|
99 |
+
# return pd.Series(dict(pred = s, source = None))
|
100 |
+
|
101 |
+
# temp = pd.concat([temp, temp["metric"].apply(parse)], axis = 1)
|
102 |
+
# results[m] = temp.set_index(["pred","source"])
|
103 |
+
# results[m] = results[m].drop(columns = ["metric"]).T
|
104 |
+
results[m] = (
|
105 |
+
temp.set_index("metric")
|
106 |
+
.T.sort_values("balanced_accuracy", ascending=False)
|
107 |
+
.drop_duplicates(subset=["team", "submission_repo"])
|
108 |
+
)
|
109 |
+
|
110 |
+
if not admin:
|
111 |
+
# only show top selected
|
112 |
+
results[m] = (
|
113 |
+
results[m]
|
114 |
+
.sort_values(["team", "balanced_accuracy"], ascending=False)
|
115 |
+
.drop_duplicates(subset=["team"])
|
116 |
+
.sort_values("balanced_accuracy", ascending=False)
|
117 |
+
)
|
118 |
+
|
119 |
+
results[m] = results[m].set_index("submission" if admin else "team")
|
120 |
+
|
121 |
+
|
122 |
+
fields_to_merge = ['generated_accuracy', 'pristine_accuracy', 'balanced_accuracy', 'total_time', 'fail_rate']
|
123 |
+
|
124 |
+
submissions = pd.concat([submissions.set_index("submission_id"),
|
125 |
+
results["private_score"].reset_index().set_index("submission_id").loc[:,fields_to_merge]],axis = 1).reset_index()
|
126 |
+
|
127 |
+
return results, submissions
|
128 |
+
|
129 |
+
|
130 |
+
def process_data(path,save_path):
|
131 |
+
submissions = load_results(path)
|
132 |
+
results,submissions = compute_metrics(submissions, path, admin=False)
|
133 |
+
cols_to_drop = ["team_id","submission_id","submission_repo","submission"]
|
134 |
+
results["public_score"].drop(columns =cols_to_drop).to_csv(save_path)
|
135 |
+
|
136 |
+
|
137 |
+
if __name__=="__main__":
|
138 |
+
process_data("comp_data_task1","task1.csv")
|
139 |
+
process_data("comp_data_task2","task2.csv")
|
140 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
scikit-learn
|
2 |
+
numpy
|
run.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
streamlit run app.py
|
task1.csv
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
team,generated_g_02,generated_g_04,generated_g_05,generated_g_06,generated_g_09,generated_g_10,generated_g_11,pristine_p_00,pristine_p_01,pristine_p_02,pristine_p_05,pristine_p_09,pristine_p_10,pristine_p_11,pristine_p_16,pristine_p_18,pristine_p_20,generated_accuracy,pristine_accuracy,balanced_accuracy,total_time,fail_rate
|
2 |
+
baseline-2,0.82,0.985,0.715,1.0,0.875,1.0,0.68,0.9,0.66,0.895,0.915,0.935,0.68,0.815,0.85,0.88,0.945,0.8678571428571428,0.8474999999999999,0.8576785714285713,1095.5729746818542,0.0
|
3 |
+
ISPL,0.965,0.985,0.445,1.0,1.0,1.0,0.29,1.0,0.08,0.985,0.98,0.97,0.835,0.925,0.965,0.86,0.995,0.812142857142857,0.8595,0.8358214285714285,86.06722044944746,0.0
|
4 |
+
baseline-1,0.825,0.995,0.465,0.97,1.0,1.0,0.925,0.96,0.37,0.955,0.815,1.0,0.69,0.285,0.635,0.885,0.955,0.8828571428571428,0.755,0.8189285714285715,2053.3383333683014,0.002058823529411765
|
5 |
+
DMF,0.925,0.74,0.76,0.965,0.835,1.0,1.0,0.37,0.0,0.11,0.105,0.81,0.175,0.99,0.53,0.43,0.45,0.8892857142857142,0.397,0.6431428571428571,73.15069174766523,0.0
|
6 |
+
Anon_Peking,0.985,0.92,0.91,0.98,0.89,0.97,0.95,0.28,0.105,0.125,0.23,0.2,0.355,0.09,0.15,0.165,0.125,0.9435714285714285,0.1825,0.5630357142857143,366.5311744213103,0.0
|
7 |
+
gylin,0.115,0.36,0.03,0.035,0.535,0.0,0.005,0.98,0.985,0.96,0.9,0.99,0.8,1.0,0.82,0.93,0.945,0.1542857142857143,0.931,0.5426428571428572,21.466523647308183,0.0
|
8 |
+
UCASRikki,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.5,116.53438615798935,0.0
|
9 |
+
safe-test,0.435,0.5,0.475,0.55,0.495,0.53,0.52,0.51,0.485,0.48,0.58,0.435,0.53,0.43,0.545,0.48,0.445,0.5007142857142858,0.492,0.4963571428571429,142.9702701568602,0.0
|
10 |
+
JAIST-HIS,0.895,0.94,0.985,0.525,0.89,0.98,0.455,0.01,0.035,0.025,0.03,0.13,0.065,0.0,0.04,0.035,0.07,0.8099999999999999,0.044,0.427,61.43132781982406,0.0
|
task2.csv
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
team,generated_g_02,generated_g_04,generated_g_05,generated_g_06,generated_g_09,generated_g_10,generated_g_11,pristine_p_00,pristine_p_01,pristine_p_02,pristine_p_05,pristine_p_09,pristine_p_10,pristine_p_11,pristine_p_16,pristine_p_18,pristine_p_20,generated_accuracy,pristine_accuracy,balanced_accuracy,total_time,fail_rate
|
2 |
+
baseline-2,0.8789473684210526,0.9394736842105263,0.6868421052631579,0.9394736842105263,0.9078947368421053,0.9263157894736842,0.8236842105263158,0.9,0.66,0.895,0.915,0.935,0.68,0.815,0.85,0.88,0.945,0.8718045112781956,0.8474999999999999,0.8596522556390978,1558.1854865550995,0.0
|
3 |
+
baseline-1,0.9210526315789473,0.9868421052631579,0.7684210526315789,0.9289473684210526,0.9763157894736842,0.9815789473684211,0.9157894736842105,0.96,0.36,0.955,0.815,1.0,0.69,0.29,0.635,0.885,0.955,0.925563909774436,0.7545,0.8400319548872179,2615.167044878006,0.008583690987124463
|
4 |
+
ISPL,0.9105263157894737,0.9131578947368421,0.7,0.9710526315789474,0.8710526315789474,0.95,0.5921052631578947,0.985,0.015,0.925,0.8,0.74,0.83,0.805,0.935,0.8,0.975,0.8439849624060151,0.7809999999999999,0.8124924812030074,4660.0,0.0
|
5 |
+
safe-test,0.49736842105263157,0.4710526315789474,0.4789473684210526,0.4842105263157895,0.5236842105263158,0.5026315789473684,0.49473684210526314,0.5,0.455,0.495,0.495,0.505,0.515,0.505,0.535,0.51,0.51,0.4932330827067669,0.5025,0.49786654135338343,200.45346903800942,0.0
|
update_data.sh
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
huggingface-cli download safe-challenge/SAFEChallengeTask1 --local-dir ./comp_data_task1 --repo-type dataset
|
2 |
+
huggingface-cli download safe-challenge/SAFEChallengeTask2 --local-dir ./comp_data_task2 --repo-type dataset
|
3 |
+
python process_data.py
|