Commit
·
56b1556
1
Parent(s):
c7e15a9
gmb/initial-upload (#1)
Browse files- .gitignore +6 -0
- .streamlit/config.toml +2 -0
- README.md +8 -5
- app.py +447 -0
- index.html +0 -19
- metric.py +207 -0
- pyproject.toml +15 -0
- requirements.txt +4 -0
- style.css +0 -28
- test.sh +1 -0
- updated.txt +1 -0
- utils.py +303 -0
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
temp*
|
2 |
+
__pycache__
|
3 |
+
.ipynb_checkpoints/
|
4 |
+
competition_cache/
|
5 |
+
.env
|
6 |
+
.vscode/launch.json
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[browser]
|
2 |
+
gatherUsageStats = false
|
README.md
CHANGED
@@ -1,10 +1,13 @@
|
|
1 |
---
|
2 |
title: Video Challenge Leaderboard
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
-
sdk:
|
|
|
|
|
7 |
pinned: false
|
|
|
8 |
---
|
9 |
|
10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: Video Challenge Leaderboard
|
3 |
+
emoji: 🏢
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: blue
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.43.2
|
8 |
+
app_file: app.py
|
9 |
pinned: false
|
10 |
+
short_description: Leaderboard
|
11 |
---
|
12 |
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from pathlib import Path
|
3 |
+
import pandas as pd
|
4 |
+
import altair as alt
|
5 |
+
import subprocess
|
6 |
+
import os
|
7 |
+
|
8 |
+
## Save results path
|
9 |
+
results_path = Path("competition_cache/cached_results")
|
10 |
+
TASKS = ["video-challenge-pilot-config", "video-challenge-task-1-config"]
|
11 |
+
valid_splits = ["public", "private"]
|
12 |
+
|
13 |
+
## Check for files initially
|
14 |
+
if not os.path.exists(results_path):
|
15 |
+
process = subprocess.Popen(
|
16 |
+
["python3", "utils.py"],
|
17 |
+
stdout=subprocess.PIPE,
|
18 |
+
stderr=subprocess.PIPE,
|
19 |
+
text=True, # Decode stdout/stderr as text
|
20 |
+
)
|
21 |
+
process.wait()
|
22 |
+
process.kill()
|
23 |
+
|
24 |
+
|
25 |
+
#####################################################################
|
26 |
+
## Data loading ##
|
27 |
+
#####################################################################
|
28 |
+
## Data loading
|
29 |
+
@st.cache_data
|
30 |
+
def load_results(task, best_only):
|
31 |
+
if best_only:
|
32 |
+
return {
|
33 |
+
f"{s}_score": pd.read_csv(f"{results_path}/{task}_{s}_score.csv")
|
34 |
+
.sort_values(["team", "balanced_accuracy"], ascending=False)
|
35 |
+
.drop_duplicates(subset=["team"])
|
36 |
+
.sort_values("balanced_accuracy", ascending=False)
|
37 |
+
.set_index("team")
|
38 |
+
for s in valid_splits
|
39 |
+
}
|
40 |
+
|
41 |
+
else:
|
42 |
+
return {
|
43 |
+
f"{s}_score": pd.read_csv(f"{results_path}/{task}_{s}_score.csv").set_index("team") for s in valid_splits
|
44 |
+
}
|
45 |
+
|
46 |
+
|
47 |
+
@st.cache_data
|
48 |
+
def load_submission():
|
49 |
+
out = []
|
50 |
+
for task in TASKS:
|
51 |
+
data = pd.read_csv(f"{results_path}/{task}_submissions.csv")
|
52 |
+
data["task"] = task
|
53 |
+
out.append(data)
|
54 |
+
|
55 |
+
return pd.concat(out, ignore_index=True)
|
56 |
+
|
57 |
+
|
58 |
+
@st.cache_data
|
59 |
+
def get_updated_time(file="updated.txt"):
|
60 |
+
return open(file).read()
|
61 |
+
|
62 |
+
|
63 |
+
@st.cache_data
|
64 |
+
def get_volume():
|
65 |
+
subs = pd.concat(
|
66 |
+
[pd.read_csv(f"{results_path}/{task}_submissions.csv") for task in TASKS],
|
67 |
+
ignore_index=True,
|
68 |
+
)
|
69 |
+
subs["datetime"] = pd.DatetimeIndex(subs["datetime"])
|
70 |
+
subs["date"] = subs["datetime"].dt.date
|
71 |
+
subs = subs.groupby(["date", "status_reason"]).size().unstack().fillna(0).reset_index()
|
72 |
+
|
73 |
+
return subs
|
74 |
+
|
75 |
+
|
76 |
+
@st.cache_data
|
77 |
+
def make_heatmap(results, label="generated", symbol="🤖"):
|
78 |
+
|
79 |
+
# Assuming df is your wide-format DataFrame (models as rows, datasets as columns)
|
80 |
+
df_long = results.set_index("team")
|
81 |
+
|
82 |
+
team_order = results.index.tolist()
|
83 |
+
df_long = df_long.loc[:, [c for c in df_long.columns if c.startswith(label) and "accuracy" not in c]]
|
84 |
+
|
85 |
+
df_long.columns = [c.replace(f"{label}_", "") for c in df_long.columns]
|
86 |
+
|
87 |
+
if "none" in df_long.columns:
|
88 |
+
df_long = df_long.drop(columns=["none"])
|
89 |
+
|
90 |
+
df_long = df_long.reset_index().melt(id_vars="team", var_name="source", value_name="acc")
|
91 |
+
# df_long.rename(columns={'index': 'source'}, inplace=True)
|
92 |
+
# df_long
|
93 |
+
# return
|
94 |
+
|
95 |
+
# Base chart for rectangles
|
96 |
+
base = alt.Chart(df_long).encode(
|
97 |
+
x=alt.X("source:O", title="Source", axis=alt.Axis(orient="top", labelAngle=-60)),
|
98 |
+
y=alt.Y("team:O", title="Team", sort=team_order),
|
99 |
+
)
|
100 |
+
|
101 |
+
# Heatmap rectangles
|
102 |
+
heatmap = base.mark_rect().encode(
|
103 |
+
color=alt.Color("acc:Q", scale=alt.Scale(scheme="greens"), title=f"{label} Accuracy")
|
104 |
+
)
|
105 |
+
|
106 |
+
# Text labels
|
107 |
+
text = base.mark_text(baseline="middle", fontSize=16).encode(
|
108 |
+
text=alt.Text("acc:Q", format=".2f"),
|
109 |
+
color=alt.condition(
|
110 |
+
alt.datum.acc < 0.5, # you can tune this for readability
|
111 |
+
alt.value("black"),
|
112 |
+
alt.value("white"),
|
113 |
+
),
|
114 |
+
)
|
115 |
+
|
116 |
+
# Combine heatmap and text
|
117 |
+
chart = (heatmap + text).properties(width=600, height=500, title=f"Accuracy on {symbol} {label} sources heatmap")
|
118 |
+
|
119 |
+
return chart
|
120 |
+
|
121 |
+
|
122 |
+
@st.cache_data
|
123 |
+
def make_roc_curves(task, best_only=False):
|
124 |
+
|
125 |
+
rocs = pd.read_csv(f"{results_path}/{task}_rocs.csv")
|
126 |
+
|
127 |
+
# if best_only:
|
128 |
+
# rocs = rocs.sort_values(by=["auc"],ascending=False).drop_duplicates("team")
|
129 |
+
|
130 |
+
roc_chart = (
|
131 |
+
alt.Chart(rocs)
|
132 |
+
.mark_line()
|
133 |
+
.encode(
|
134 |
+
x="fpr",
|
135 |
+
y="tpr",
|
136 |
+
color="team:N",
|
137 |
+
detail="submission_id:N"
|
138 |
+
)
|
139 |
+
)
|
140 |
+
|
141 |
+
return roc_chart
|
142 |
+
|
143 |
+
|
144 |
+
#####################################################################
|
145 |
+
## Page definition ##
|
146 |
+
#####################################################################
|
147 |
+
|
148 |
+
## Set title
|
149 |
+
st.set_page_config(
|
150 |
+
page_title="Leaderboard",
|
151 |
+
initial_sidebar_state="collapsed",
|
152 |
+
layout="wide", # This makes the app use the full width of the screen
|
153 |
+
)
|
154 |
+
|
155 |
+
## Pull new results or toggle private public if you are an owner
|
156 |
+
with st.sidebar:
|
157 |
+
|
158 |
+
hf_token = os.getenv("HF_TOKEN")
|
159 |
+
password = st.text_input("Admin login:", type="password")
|
160 |
+
|
161 |
+
if password == hf_token:
|
162 |
+
if st.button("Pull New Results"):
|
163 |
+
with st.spinner("Pullin new results", show_time=True):
|
164 |
+
try:
|
165 |
+
process = subprocess.Popen(
|
166 |
+
["python3", "utils.py"],
|
167 |
+
stdout=subprocess.PIPE,
|
168 |
+
stderr=subprocess.PIPE,
|
169 |
+
text=True, # Decode stdout/stderr as text
|
170 |
+
)
|
171 |
+
st.success(f"Background task started with PID: {process.pid}")
|
172 |
+
process.wait()
|
173 |
+
process.kill()
|
174 |
+
st.success(f"PID {process.pid} finished!")
|
175 |
+
# If a user has the right perms, then this clears the cache
|
176 |
+
load_results.clear()
|
177 |
+
get_volume.clear()
|
178 |
+
load_submission.clear()
|
179 |
+
st.rerun()
|
180 |
+
except Exception as e:
|
181 |
+
st.error(f"Error starting background task: {e}")
|
182 |
+
|
183 |
+
## Initialize the toggle state in session_state if it doesn't exist
|
184 |
+
if "private_view" not in st.session_state:
|
185 |
+
st.session_state.private_view = False
|
186 |
+
|
187 |
+
# Create the toggle widget
|
188 |
+
# The 'value' parameter sets the initial state, here linked to session_state
|
189 |
+
# The 'key' parameter is crucial for identifying the widget across reruns and linking to session_state
|
190 |
+
toggle_value = st.toggle("Private Scores", value=st.session_state.private_view, key="private_view")
|
191 |
+
|
192 |
+
# The 'toggle_value' variable will hold the current state of the toggle (True or False)
|
193 |
+
if toggle_value:
|
194 |
+
st.write("Showing **PRIVATE** scores.")
|
195 |
+
else:
|
196 |
+
st.write("Showing **PUBLIC** scores.")
|
197 |
+
|
198 |
+
split = "public" if not toggle_value else "private"
|
199 |
+
else:
|
200 |
+
split = "public"
|
201 |
+
|
202 |
+
|
203 |
+
def show_leaderboad(results,task):
|
204 |
+
cols = [
|
205 |
+
"generated_accuracy",
|
206 |
+
"real_accuracy",
|
207 |
+
# "pristine_accuracy",
|
208 |
+
"balanced_accuracy",
|
209 |
+
"auc",
|
210 |
+
"fail_rate",
|
211 |
+
"total_time",
|
212 |
+
]
|
213 |
+
|
214 |
+
# st.dataframe(results[f"{split}_score"])
|
215 |
+
|
216 |
+
column_config = {
|
217 |
+
"balanced_accuracy": st.column_config.NumberColumn(
|
218 |
+
"⚖️ Balanced Accruacy",
|
219 |
+
format="compact",
|
220 |
+
min_value=0,
|
221 |
+
pinned=True,
|
222 |
+
max_value=1.0,
|
223 |
+
# width="small",
|
224 |
+
),
|
225 |
+
"generated_accuracy": st.column_config.NumberColumn(
|
226 |
+
"🤖 True Postive Rate",
|
227 |
+
format="compact",
|
228 |
+
min_value=0,
|
229 |
+
pinned=True,
|
230 |
+
max_value=1.0,
|
231 |
+
# width="small",
|
232 |
+
),
|
233 |
+
"real_accuracy": st.column_config.NumberColumn(
|
234 |
+
"🧑🎤 True Negative Rate",
|
235 |
+
format="compact",
|
236 |
+
min_value=0,
|
237 |
+
pinned=True,
|
238 |
+
max_value=1.0,
|
239 |
+
# width="small",
|
240 |
+
),
|
241 |
+
"auc": st.column_config.NumberColumn(
|
242 |
+
"📐 AUC",
|
243 |
+
format="compact",
|
244 |
+
min_value=0,
|
245 |
+
pinned=True,
|
246 |
+
max_value=1.0,
|
247 |
+
# width="small",
|
248 |
+
),
|
249 |
+
"fail_rate": st.column_config.NumberColumn(
|
250 |
+
"❌ Fail Rate",
|
251 |
+
format="compact",
|
252 |
+
# width="small",
|
253 |
+
),
|
254 |
+
"total_time": st.column_config.NumberColumn(
|
255 |
+
"🕒 Inference Time",
|
256 |
+
format="compact",
|
257 |
+
# width="small",
|
258 |
+
),
|
259 |
+
}
|
260 |
+
|
261 |
+
labels = {"real": "🧑🎤", "generated": "🤖"}
|
262 |
+
|
263 |
+
for c in results[f"{split}_score"].columns:
|
264 |
+
if "accuracy" in c:
|
265 |
+
continue
|
266 |
+
if any(p in c for p in ["generated", "real"]):
|
267 |
+
s = c.split("_")
|
268 |
+
pred = s[0]
|
269 |
+
source = " ".join(s[1:])
|
270 |
+
column_config[c] = st.column_config.NumberColumn(
|
271 |
+
labels[pred] + " " + source,
|
272 |
+
help=c,
|
273 |
+
format="compact",
|
274 |
+
min_value=0,
|
275 |
+
max_value=1.0,
|
276 |
+
)
|
277 |
+
|
278 |
+
"#### Summary"
|
279 |
+
st.dataframe(results[f"{split}_score"].loc[:, cols], column_config=column_config)
|
280 |
+
|
281 |
+
cond_bacc = st.toggle("Conditional Balanced Accuracy",value=False, key = f"cond_bacc_{task}")
|
282 |
+
|
283 |
+
|
284 |
+
|
285 |
+
|
286 |
+
|
287 |
+
cols = [c for c in results[f"{split}_score"].columns if "generated_" in c and "accuracy" not in c]
|
288 |
+
temp = results[f"{split}_score"].loc[:, cols].copy()
|
289 |
+
|
290 |
+
|
291 |
+
if cond_bacc:
|
292 |
+
tnr = results[f"{split}_score"].loc[:, ["real_accuracy"]]
|
293 |
+
temp[:] = (temp.values + tnr.values)/2.
|
294 |
+
"#### 🤖 Balanced Accuracy | Generated Source"
|
295 |
+
else:
|
296 |
+
"#### 🤖 True Positive Rate | Generated Source"
|
297 |
+
|
298 |
+
st.dataframe(temp, column_config=column_config)
|
299 |
+
|
300 |
+
|
301 |
+
|
302 |
+
|
303 |
+
cols = [c for c in results[f"{split}_score"].columns if "real_" in c and "accuracy" not in c]
|
304 |
+
temp = results[f"{split}_score"].loc[:, cols].copy()
|
305 |
+
|
306 |
+
if cond_bacc:
|
307 |
+
tpr = results[f"{split}_score"].loc[:, ["generated_accuracy"]]
|
308 |
+
temp[:] = (temp.values + tpr.values)/2.
|
309 |
+
"#### 🧑🎤 Balanced Accuracy | Real Source"
|
310 |
+
else:
|
311 |
+
"#### 🧑🎤 True Negative Rate | Real Source"
|
312 |
+
|
313 |
+
|
314 |
+
st.dataframe(temp,column_config=column_config)
|
315 |
+
|
316 |
+
|
317 |
+
def make_roc(results):
|
318 |
+
results["FA"] = 1.0 - results["real_accuracy"]
|
319 |
+
|
320 |
+
chart = (
|
321 |
+
alt.Chart(results)
|
322 |
+
.mark_circle()
|
323 |
+
.encode(
|
324 |
+
x=alt.X("FA:Q", title="🧑🎤 False Positive Rate", scale=alt.Scale(domain=[0.0, 1.0])),
|
325 |
+
y=alt.Y("generated_accuracy:Q", title="🤖 True Positive Rate", scale=alt.Scale(domain=[0.0, 1.0])),
|
326 |
+
color="team:N", # Color by categorical field
|
327 |
+
size=alt.Size(
|
328 |
+
"total_time:Q", title="🕒 Inference Time", scale=alt.Scale(rangeMin=100)
|
329 |
+
), # Size by quantitative field
|
330 |
+
)
|
331 |
+
.properties(width=400, height=400, title="Detection vs False Alarm vs Inference Time")
|
332 |
+
)
|
333 |
+
|
334 |
+
diag_line = (
|
335 |
+
alt.Chart(pd.DataFrame(dict(tpr=[0, 1], fpr=[0, 1])))
|
336 |
+
.mark_line(color="lightgray", strokeDash=[8, 4])
|
337 |
+
.encode(x="fpr", y="tpr")
|
338 |
+
)
|
339 |
+
|
340 |
+
return chart + diag_line
|
341 |
+
|
342 |
+
|
343 |
+
def make_acc(results):
|
344 |
+
# results["FA"] = 1. - results["pristine_accuracy"]
|
345 |
+
# results = results[results["total_time"] >= 0]
|
346 |
+
# results["total_time"] = results["total_time"]
|
347 |
+
|
348 |
+
results = results.loc[results["total_time"] >= 0]
|
349 |
+
|
350 |
+
chart = (
|
351 |
+
alt.Chart(results)
|
352 |
+
.mark_circle(size=200)
|
353 |
+
.encode(
|
354 |
+
x=alt.X("total_time:Q", title="🕒 Inference Time", scale = alt.Scale(domain=[0., 10000])),
|
355 |
+
y=alt.Y(
|
356 |
+
"balanced_accuracy:Q",
|
357 |
+
title="Balanced Accuracy",
|
358 |
+
scale=alt.Scale(domain=[0.4, 1]),
|
359 |
+
),
|
360 |
+
color="team:N", # Color by categorical field # Size by quantitative field
|
361 |
+
)
|
362 |
+
.properties(width=400, height=400, title="Inference Time vs Balanced Accuracy")
|
363 |
+
)
|
364 |
+
diag_line = (
|
365 |
+
alt.Chart(pd.DataFrame(dict(t=[0, results["total_time"].max()], y=[0.5, 0.5])))
|
366 |
+
.mark_line(color="lightgray", strokeDash=[8, 4])
|
367 |
+
.encode(x="t", y="y")
|
368 |
+
)
|
369 |
+
return chart + diag_line
|
370 |
+
|
371 |
+
|
372 |
+
def get_heatmaps(temp):
|
373 |
+
h1 = make_heatmap(temp, "generated", symbol="🤖")
|
374 |
+
h2 = make_heatmap(temp, "real", symbol="🧑🎤")
|
375 |
+
|
376 |
+
st.altair_chart(h1, use_container_width=True)
|
377 |
+
st.altair_chart(h2, use_container_width=True)
|
378 |
+
|
379 |
+
if temp.columns.str.contains("aug", case=False).any():
|
380 |
+
h3 = make_heatmap(temp, "aug", symbol="🛠️")
|
381 |
+
st.altair_chart(h3, use_container_width=True)
|
382 |
+
|
383 |
+
|
384 |
+
def make_plots_for_task(task, split, best_only):
|
385 |
+
results = load_results(task, best_only=best_only)
|
386 |
+
# results1[f"{split}_score"]
|
387 |
+
temp = results[f"{split}_score"].reset_index()
|
388 |
+
|
389 |
+
# st.write(temp)
|
390 |
+
|
391 |
+
t1, t2 = st.tabs(["Tables", "Charts"])
|
392 |
+
with t1:
|
393 |
+
show_leaderboad(results,task)
|
394 |
+
|
395 |
+
with t2:
|
396 |
+
|
397 |
+
roc_scatter = make_roc(temp)
|
398 |
+
acc_vs_time = make_acc(temp)
|
399 |
+
|
400 |
+
if split == "private" and hf_token is not None:
|
401 |
+
# with t2:
|
402 |
+
full_curves = st.toggle("full curves", value=True, key=f"all curves {task}")
|
403 |
+
|
404 |
+
if full_curves:
|
405 |
+
roc_scatter = make_roc_curves(task, best_only) + roc_scatter
|
406 |
+
|
407 |
+
st.altair_chart(roc_scatter | acc_vs_time, use_container_width=False)
|
408 |
+
else:
|
409 |
+
# with t2:
|
410 |
+
st.altair_chart(roc_scatter | acc_vs_time, use_container_width=False)
|
411 |
+
|
412 |
+
# with t3:
|
413 |
+
# get_heatmaps(temp)
|
414 |
+
|
415 |
+
|
416 |
+
updated = get_updated_time()
|
417 |
+
st.markdown(updated)
|
418 |
+
# st.markdown("#### Detailed Public Leaderboard")
|
419 |
+
# st.markdown("[SAFE: Synthetic Audio Forensics Evaluation Challenge](https://stresearch.github.io/SAFE/)")
|
420 |
+
best_only = True # st.toggle("Only Best per Team", value=True)
|
421 |
+
# show_chart = st.toggle("Show Table", value=True)
|
422 |
+
|
423 |
+
|
424 |
+
tp, t1, volume_tab, all_submission_tab = st.tabs(["**Pilot Task**","**Task 1**", "**Submission Volume**", "**All Submissions**"])
|
425 |
+
with tp:
|
426 |
+
"*Detection of Synthetic Video Content. Video files are unmodified from the original output from the models or the real sources.*"
|
427 |
+
make_plots_for_task(TASKS[0], split, best_only)
|
428 |
+
with t1:
|
429 |
+
"*Detection of Synthetic Video Content. Video files are unmodified from the original output from the models or the real sources.*"
|
430 |
+
make_plots_for_task(TASKS[1], split, best_only)
|
431 |
+
|
432 |
+
with volume_tab:
|
433 |
+
subs = get_volume()
|
434 |
+
status_lookup = "QUEUED,PROCESSING,SUCCESS,FAILED".split(",")
|
435 |
+
found_columns = subs.columns.values.tolist()
|
436 |
+
status_lookup = list(set(status_lookup) & set(found_columns))
|
437 |
+
st.bar_chart(subs, x="date", y=status_lookup, stack=True)
|
438 |
+
|
439 |
+
total_submissions = int(subs.loc[:, status_lookup].fillna(0).values.sum())
|
440 |
+
st.metric("Total Submissions", value=total_submissions)
|
441 |
+
|
442 |
+
st.metric("Duration", f'{(subs["date"].max() - subs["date"].min()).days} days')
|
443 |
+
|
444 |
+
if split == "private":
|
445 |
+
with all_submission_tab:
|
446 |
+
data = load_submission()
|
447 |
+
st.dataframe(data)
|
index.html
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
<!doctype html>
|
2 |
-
<html>
|
3 |
-
<head>
|
4 |
-
<meta charset="utf-8" />
|
5 |
-
<meta name="viewport" content="width=device-width" />
|
6 |
-
<title>My static Space</title>
|
7 |
-
<link rel="stylesheet" href="style.css" />
|
8 |
-
</head>
|
9 |
-
<body>
|
10 |
-
<div class="card">
|
11 |
-
<h1>Welcome to your static Space!</h1>
|
12 |
-
<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
|
13 |
-
<p>
|
14 |
-
Also don't forget to check the
|
15 |
-
<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
|
16 |
-
</p>
|
17 |
-
</div>
|
18 |
-
</body>
|
19 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
metric.py
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import re
|
3 |
+
import pandas as pd
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
from sklearn.metrics import roc_auc_score, roc_curve
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
|
9 |
+
def compute_roc(solution_df):
|
10 |
+
|
11 |
+
## fix weird submissions
|
12 |
+
if isinstance(solution_df.iloc[0]["score"], str):
|
13 |
+
solution_df.loc[:, "score"] = solution_df.loc[:, "score"].apply(
|
14 |
+
lambda a: float(
|
15 |
+
# np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze()
|
16 |
+
np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze()
|
17 |
+
if isinstance(a, str)
|
18 |
+
else float("nan")
|
19 |
+
)
|
20 |
+
)
|
21 |
+
|
22 |
+
isna = solution_df["score"].isna()
|
23 |
+
|
24 |
+
if isna.all():
|
25 |
+
## if all nans
|
26 |
+
return -1
|
27 |
+
|
28 |
+
solution_df = solution_df.loc[~isna]
|
29 |
+
auc = roc_auc_score(solution_df["pred"] == "generated", solution_df["score"])
|
30 |
+
return auc
|
31 |
+
|
32 |
+
|
33 |
+
def compute_roc_curve(solution_df, keep_every: int = 10):
|
34 |
+
|
35 |
+
## fix weird submissions
|
36 |
+
if isinstance(solution_df.iloc[0]["score"], str):
|
37 |
+
solution_df.loc[:, "score"] = solution_df.loc[:, "score"].apply(
|
38 |
+
lambda a: float(
|
39 |
+
# np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze()
|
40 |
+
np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze()
|
41 |
+
if isinstance(a, str)
|
42 |
+
else float("nan")
|
43 |
+
)
|
44 |
+
)
|
45 |
+
|
46 |
+
isna = solution_df["score"].isna()
|
47 |
+
|
48 |
+
if isna.all():
|
49 |
+
## if all nans
|
50 |
+
return {"fpr": [], "tpr": [], "threshold": []}
|
51 |
+
|
52 |
+
solution_df = solution_df.loc[~isna]
|
53 |
+
fpr, tpr, threshold = roc_curve(solution_df["pred"] == "generated", solution_df["score"])
|
54 |
+
if len(fpr) < keep_every:
|
55 |
+
return {"fpr": fpr.tolist(), "tpr": tpr.tolist(), "threshold": threshold.tolist()}
|
56 |
+
|
57 |
+
# Sample every keep_every
|
58 |
+
return {
|
59 |
+
"fpr": fpr.tolist()[::keep_every],
|
60 |
+
"tpr": tpr.tolist()[::keep_every],
|
61 |
+
"threshold": threshold.tolist()[::keep_every],
|
62 |
+
}
|
63 |
+
|
64 |
+
|
65 |
+
def _metric(solution_df, submission_df, mode="top_level", full: bool = False):
|
66 |
+
"""
|
67 |
+
This function calculates the accuracy of the generated predictions.
|
68 |
+
|
69 |
+
Parameters
|
70 |
+
----------
|
71 |
+
solution_df : pandas.DataFrame
|
72 |
+
The dataframe containing the solution data.
|
73 |
+
submission_df : pandas.DataFrame
|
74 |
+
The dataframe containing the submission data.
|
75 |
+
mode : str, optional
|
76 |
+
The mode of evaluation. Can be "top_level" or "bottom_level". The default is "top_level".
|
77 |
+
full: bool, optional
|
78 |
+
Full evaluation mode breaks up scores by source (both anonymized and original)
|
79 |
+
|
80 |
+
Returns
|
81 |
+
-------
|
82 |
+
None.
|
83 |
+
"""
|
84 |
+
|
85 |
+
## Allocate space
|
86 |
+
evaluation = {}
|
87 |
+
|
88 |
+
## Ensure alignment of keys and group relevant columns
|
89 |
+
solution_df["submission_pred"] = solution_df.join(submission_df, lsuffix="_solution", rsuffix="_submission")[
|
90 |
+
"pred_submission"
|
91 |
+
].values
|
92 |
+
cols = ["split", "pred", "source", "source_og"]
|
93 |
+
solution_df["correct"] = solution_df["pred"] == solution_df["submission_pred"]
|
94 |
+
accuracy = solution_df.groupby(cols)["correct"].mean().to_frame("accuracy").reset_index()
|
95 |
+
accuracy["score_name"] = accuracy["pred"] + "_" + accuracy["source"]
|
96 |
+
|
97 |
+
## Create public dataframe and private dataframe
|
98 |
+
public_df = accuracy.query(f"split=='public'").copy()
|
99 |
+
private_df = accuracy.copy()
|
100 |
+
private_df["score_name"] = private_df["pred"] + "_" + private_df["source_og"]
|
101 |
+
|
102 |
+
|
103 |
+
## Perform a loop over categories for reported metrics
|
104 |
+
for split, temp in zip(["public", "private"], [public_df, private_df]):
|
105 |
+
scores_by_source = temp.set_index("score_name")["accuracy"].sort_index()
|
106 |
+
scores_by_source["generated_accuracy"] = temp.query("pred=='generated'")["accuracy"].mean()
|
107 |
+
scores_by_source["real_accuracy"] = temp.query("pred=='real'")["accuracy"].mean()
|
108 |
+
scores_by_source["balanced_accuracy"] = (
|
109 |
+
scores_by_source["generated_accuracy"] + scores_by_source["real_accuracy"]
|
110 |
+
) / 2.0
|
111 |
+
if mode == "top_level":
|
112 |
+
scores_to_save = ["generated_accuracy", "real_accuracy", "balanced_accuracy"]
|
113 |
+
evaluation[f"{split}_score"] = scores_by_source.loc[scores_to_save].to_dict()
|
114 |
+
else:
|
115 |
+
evaluation[f"{split}_score"] = scores_by_source.to_dict()
|
116 |
+
|
117 |
+
## Compute by source - anonymized and original
|
118 |
+
# if full:
|
119 |
+
# evaluation[f"{split}_score"]["anon_source"] = temp.groupby("source")["accuracy"].mean().to_dict()
|
120 |
+
# evaluation[f"{split}_score"]["original_source"] = temp.groupby("source_og")["accuracy"].mean().to_dict()
|
121 |
+
|
122 |
+
## Save data split
|
123 |
+
evaluation["public_score"]["proportion"] = len(solution_df.query(f"split=='public'").copy()) / len(solution_df)
|
124 |
+
evaluation["private_score"]["proportion"] = 1.0
|
125 |
+
|
126 |
+
## Compute AUC
|
127 |
+
if "score" in submission_df.columns:
|
128 |
+
solution_df["score"] = submission_df["score"]
|
129 |
+
|
130 |
+
## Public
|
131 |
+
split = "public"
|
132 |
+
temp = solution_df.query(f"split=='{split}'").copy()
|
133 |
+
try:
|
134 |
+
auc = compute_roc(temp)
|
135 |
+
except Exception as e:
|
136 |
+
print("failed auc")
|
137 |
+
print(e)
|
138 |
+
auc = "nan"
|
139 |
+
evaluation[f"{split}_score"]["auc"] = float(auc)
|
140 |
+
evaluation[f"{split}_score"]["fail_rate"] = float(temp["score"].isna().mean())
|
141 |
+
|
142 |
+
## Private
|
143 |
+
split = "private"
|
144 |
+
temp = solution_df.copy()
|
145 |
+
try:
|
146 |
+
auc = compute_roc(temp)
|
147 |
+
except Exception as e:
|
148 |
+
print("failed auc")
|
149 |
+
print(e)
|
150 |
+
auc = "nan"
|
151 |
+
evaluation[f"{split}_score"]["auc"] = float(auc)
|
152 |
+
evaluation[f"{split}_score"]["fail_rate"] = float(temp["score"].isna().mean())
|
153 |
+
|
154 |
+
## Full data computations
|
155 |
+
if not full:
|
156 |
+
return evaluation
|
157 |
+
|
158 |
+
## Roc
|
159 |
+
if "score" in submission_df.columns:
|
160 |
+
solution_df["score"] = submission_df["score"]
|
161 |
+
|
162 |
+
## Public
|
163 |
+
split = "public"
|
164 |
+
temp = solution_df.query(f"split=='{split}'").copy()
|
165 |
+
try:
|
166 |
+
roc_curve = compute_roc_curve(temp)
|
167 |
+
except Exception as e:
|
168 |
+
print("failed roc")
|
169 |
+
print(e)
|
170 |
+
roc_curve = {"fpr": [], "tpr": [], "threshold": []}
|
171 |
+
evaluation[f"{split}_score"]["roc"] = roc_curve
|
172 |
+
|
173 |
+
## Private
|
174 |
+
split = "private"
|
175 |
+
temp = solution_df.copy()
|
176 |
+
try:
|
177 |
+
roc_curve = compute_roc_curve(temp)
|
178 |
+
except Exception as e:
|
179 |
+
print("failed roc")
|
180 |
+
print(e)
|
181 |
+
roc_curve = {"fpr": [], "tpr": [], "threshold": []}
|
182 |
+
evaluation[f"{split}_score"]["roc"] = roc_curve
|
183 |
+
|
184 |
+
return evaluation
|
185 |
+
|
186 |
+
|
187 |
+
def compute(params):
|
188 |
+
solution_file = hf_hub_download(
|
189 |
+
repo_id=params.competition_id,
|
190 |
+
filename="solution.csv",
|
191 |
+
token=params.token,
|
192 |
+
repo_type="dataset",
|
193 |
+
)
|
194 |
+
|
195 |
+
solution_df = pd.read_csv(solution_file).set_index(params.submission_id_col)
|
196 |
+
|
197 |
+
submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
|
198 |
+
submission_file = hf_hub_download(
|
199 |
+
repo_id=params.competition_id,
|
200 |
+
filename=submission_filename,
|
201 |
+
token=params.token,
|
202 |
+
repo_type="dataset",
|
203 |
+
)
|
204 |
+
|
205 |
+
submission_df = pd.read_csv(submission_file).set_index(params.submission_id_col)
|
206 |
+
|
207 |
+
return _metric(solution_df, submission_df)
|
pyproject.toml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "leaderboard"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"streamlit",
|
9 |
+
"pandas",
|
10 |
+
"altair",
|
11 |
+
"scikit-learn",
|
12 |
+
"huggingface_hub",
|
13 |
+
"vl-convert-python",
|
14 |
+
"hf_transfer"
|
15 |
+
]
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
scikit-learn
|
2 |
+
numpy
|
3 |
+
streamlit
|
4 |
+
huggingface_hub
|
style.css
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
body {
|
2 |
-
padding: 2rem;
|
3 |
-
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
|
4 |
-
}
|
5 |
-
|
6 |
-
h1 {
|
7 |
-
font-size: 16px;
|
8 |
-
margin-top: 0;
|
9 |
-
}
|
10 |
-
|
11 |
-
p {
|
12 |
-
color: rgb(107, 114, 128);
|
13 |
-
font-size: 15px;
|
14 |
-
margin-bottom: 10px;
|
15 |
-
margin-top: 5px;
|
16 |
-
}
|
17 |
-
|
18 |
-
.card {
|
19 |
-
max-width: 620px;
|
20 |
-
margin: 0 auto;
|
21 |
-
padding: 16px;
|
22 |
-
border: 1px solid lightgray;
|
23 |
-
border-radius: 16px;
|
24 |
-
}
|
25 |
-
|
26 |
-
.card p:last-child {
|
27 |
-
margin-bottom: 0;
|
28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
test.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
HF_TOKEN=test streamlit run app.py
|
updated.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Updated on 2025-08-06 11:28:08 EST
|
utils.py
ADDED
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from datetime import datetime
|
3 |
+
from pathlib import Path
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
+
import tqdm.auto as tqdm
|
6 |
+
from typing import Any, Dict, List, Tuple
|
7 |
+
from collections import defaultdict
|
8 |
+
from metric import _metric
|
9 |
+
import os
|
10 |
+
import pandas as pd
|
11 |
+
|
12 |
+
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
13 |
+
os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "20"
|
14 |
+
COMP_CACHE = os.environ.get("COMP_CACHE", "./competition_cache")
|
15 |
+
|
16 |
+
|
17 |
+
def download_competition_data(competition_names: List[str]) -> None:
|
18 |
+
"""Download copies to local environment"""
|
19 |
+
for repo_id in tqdm.tqdm(competition_names):
|
20 |
+
snapshot_download(
|
21 |
+
repo_id=repo_id,
|
22 |
+
local_dir=os.path.join(COMP_CACHE, repo_id),
|
23 |
+
repo_type="dataset",
|
24 |
+
token=os.environ.get("HF_TOKEN"),
|
25 |
+
)
|
26 |
+
|
27 |
+
|
28 |
+
STATUS_MAP = {0: "PENDING", 1: "QUEUED", 2: "PROCESSING", 3: "SUCCESS", 4: "FAILED"}
|
29 |
+
|
30 |
+
## Make a directory to store computed results
|
31 |
+
os.makedirs(Path("competition_cache") / "cached_results", exist_ok=True)
|
32 |
+
|
33 |
+
|
34 |
+
def load_teams(competition_space_path: Path) -> pd.DataFrame:
|
35 |
+
team_file_name = "teams.json"
|
36 |
+
return pd.read_json(Path(competition_space_path) / team_file_name).T
|
37 |
+
|
38 |
+
|
39 |
+
def json_to_dataframe(data, extra_column_name=None, extra_column_value=None):
|
40 |
+
flat_data = []
|
41 |
+
for entry in data:
|
42 |
+
original_flat_entry = {**entry}
|
43 |
+
flat_entry = {k: v for k, v in original_flat_entry.items() if not "score" in k}
|
44 |
+
times = {
|
45 |
+
k.replace("score", "time"): v.get("total_time", -1) for k, v in original_flat_entry.items() if "score" in k
|
46 |
+
}
|
47 |
+
flat_entry.update(times)
|
48 |
+
if extra_column_name:
|
49 |
+
flat_entry[extra_column_name] = extra_column_value
|
50 |
+
flat_data.append(flat_entry)
|
51 |
+
df = pd.DataFrame(flat_data)
|
52 |
+
return df
|
53 |
+
|
54 |
+
|
55 |
+
def load_submission_map(competition_space_path: Path) -> Tuple[Dict[str, str], pd.DataFrame]:
|
56 |
+
submission_info_dir = "submission_info"
|
57 |
+
submission_info_files = list((Path(competition_space_path) / submission_info_dir).glob("*.json"))
|
58 |
+
|
59 |
+
# Loop and collect submission IDs by team
|
60 |
+
team_submissions: Dict[str, str] = {}
|
61 |
+
submission_summaries: List[pd.DataFrame] = []
|
62 |
+
for file in submission_info_files:
|
63 |
+
with open(file, "r") as fn:
|
64 |
+
json_data = json.load(fn)
|
65 |
+
submission_summaries.append(
|
66 |
+
json_to_dataframe(
|
67 |
+
data=json_data["submissions"], extra_column_name="team_id", extra_column_value=json_data["id"]
|
68 |
+
)
|
69 |
+
)
|
70 |
+
submission_list = pd.read_json(file).submissions.values.tolist()
|
71 |
+
for submission in submission_list:
|
72 |
+
team_submissions[submission["submission_id"]] = submission["submitted_by"]
|
73 |
+
submission_summary = pd.concat(submission_summaries, axis=0)
|
74 |
+
submission_summary["status_reason"] = submission_summary["status"].apply(lambda x: STATUS_MAP[x])
|
75 |
+
return team_submissions, submission_summary
|
76 |
+
|
77 |
+
|
78 |
+
def get_member_to_team_map(teams: pd.DataFrame, team_submissions: Dict[str, str]) -> Dict[str, str]:
|
79 |
+
member_map: Dict[str, str] = {}
|
80 |
+
for member_id in team_submissions.values():
|
81 |
+
member_map[member_id] = teams[teams.members.apply(lambda x: member_id in x)].id.values[0]
|
82 |
+
return member_map
|
83 |
+
|
84 |
+
|
85 |
+
def load_submissions(competition_space_path: Path) -> Dict[str, Dict[str, pd.DataFrame]]:
|
86 |
+
submission_dir = "submissions"
|
87 |
+
submissions: Dict[str, Dict[str, pd.DataFrame]] = defaultdict(dict)
|
88 |
+
for file in list((Path(competition_space_path) / submission_dir).glob("*.csv")):
|
89 |
+
file_name = str(file).split("/")[-1].split(".")[0]
|
90 |
+
team_id = "-".join(file_name.split("/")[-1].split("-")[:5])
|
91 |
+
sub_id = "-".join(file_name.split("/")[-1].split("-")[5:])
|
92 |
+
submissions[team_id][sub_id] = pd.read_csv(file).set_index("id")
|
93 |
+
return submissions
|
94 |
+
|
95 |
+
|
96 |
+
def compute_metric_per_team(solution_df: pd.DataFrame, team_submissions: Dict[str, pd.DataFrame]) -> Dict[str, Any]:
|
97 |
+
results: Dict[str, Any] = {}
|
98 |
+
for submission_id, submission in team_submissions.items():
|
99 |
+
results[submission_id] = _metric(solution_df=solution_df, submission_df=submission, mode="detailed", full=True)
|
100 |
+
return results
|
101 |
+
|
102 |
+
|
103 |
+
def prep_public(public_results: Dict[str, Any]) -> Dict[str, Any]:
|
104 |
+
new: Dict[str, Any] = {}
|
105 |
+
for key, value in public_results.items():
|
106 |
+
# if key == "anon_source":
|
107 |
+
# for sub_key, sub_value in value.items():
|
108 |
+
# sub_key = ("generated" if sub_key[0] == "g" else "real") + "_" + sub_key.split("_")[-1]
|
109 |
+
# new[sub_key] = sub_value
|
110 |
+
# continue
|
111 |
+
if key in ["proportion", "roc", "original_source"]:
|
112 |
+
continue
|
113 |
+
new[key] = value
|
114 |
+
return new
|
115 |
+
|
116 |
+
|
117 |
+
def prep_private(private_results: Dict[str, Any]) -> Dict[str, Any]:
|
118 |
+
new: Dict[str, Any] = {}
|
119 |
+
for key, value in private_results.items():
|
120 |
+
# if key == "original_source":
|
121 |
+
# for sub_key, sub_value in value.items():
|
122 |
+
# sub_key = ("real" if sub_key in REAL_MAP else "generated") + "_" + sub_key
|
123 |
+
# new[sub_key] = sub_value
|
124 |
+
# continue
|
125 |
+
if key in ["proportion", "roc", "anon_source"]:
|
126 |
+
continue
|
127 |
+
new[key] = value
|
128 |
+
return new
|
129 |
+
|
130 |
+
|
131 |
+
def extract_roc(results: Dict[str, Any]) -> Dict[str, Any]:
|
132 |
+
new: Dict[str, Any] = {}
|
133 |
+
for key, value in results.items():
|
134 |
+
if key in ["roc"]:
|
135 |
+
for sub_key, sub_value in value.items():
|
136 |
+
new[sub_key] = sub_value
|
137 |
+
continue
|
138 |
+
if key in ["auc"]:
|
139 |
+
new[key] = value
|
140 |
+
return new
|
141 |
+
|
142 |
+
|
143 |
+
if __name__ == "__main__":
|
144 |
+
|
145 |
+
## Download data
|
146 |
+
spaces: List[str] = ["safe-challenge/video-challenge-pilot-config", "safe-challenge/video-challenge-task-1-config"]
|
147 |
+
download_competition_data(competition_names=spaces)
|
148 |
+
|
149 |
+
## Loop
|
150 |
+
for space in spaces:
|
151 |
+
local_dir = Path("competition_cache") / space
|
152 |
+
|
153 |
+
## Load relevant data
|
154 |
+
teams = load_teams(competition_space_path=local_dir)
|
155 |
+
team_submissions, submission_summaries = load_submission_map(competition_space_path=local_dir)
|
156 |
+
member_map = get_member_to_team_map(teams=teams, team_submissions=team_submissions)
|
157 |
+
submissions = load_submissions(competition_space_path=local_dir)
|
158 |
+
|
159 |
+
## Load solutions
|
160 |
+
solutions_df = pd.read_csv(local_dir / "solution.csv").set_index("id")
|
161 |
+
|
162 |
+
## Loop and save by team
|
163 |
+
public, private, rocs = [], [], []
|
164 |
+
for team_id, submission_set in submissions.items():
|
165 |
+
results = compute_metric_per_team(solution_df=solutions_df, team_submissions=submission_set)
|
166 |
+
public_results = {
|
167 |
+
key: prep_public(value["public_score"]) for key, value in results.items() if key in team_submissions
|
168 |
+
}
|
169 |
+
private_results = {
|
170 |
+
key: prep_private(value["private_score"]) for key, value in results.items() if key in team_submissions
|
171 |
+
}
|
172 |
+
|
173 |
+
|
174 |
+
## Add timing
|
175 |
+
public_times = {
|
176 |
+
x["submission_id"]: x["public_time"]
|
177 |
+
for x in submission_summaries[submission_summaries["submission_id"].isin(results.keys())][
|
178 |
+
["submission_id", "public_time"]
|
179 |
+
].to_dict(orient="records")
|
180 |
+
}
|
181 |
+
private_times = {
|
182 |
+
x["submission_id"]: x["private_time"]
|
183 |
+
for x in submission_summaries[submission_summaries["submission_id"].isin(results.keys())][
|
184 |
+
["submission_id", "private_time"]
|
185 |
+
].to_dict(orient="records")
|
186 |
+
}
|
187 |
+
for key in public_results.keys():
|
188 |
+
public_results[key]["total_time"] = public_times[key]
|
189 |
+
for key in private_results.keys():
|
190 |
+
private_results[key]["total_time"] = private_times[key]
|
191 |
+
|
192 |
+
## Roc computations
|
193 |
+
roc_results = {
|
194 |
+
key: extract_roc(value["private_score"]) for key, value in results.items() if key in team_submissions
|
195 |
+
}
|
196 |
+
roc_df = pd.json_normalize(roc_results.values())
|
197 |
+
if len(roc_df)==0:
|
198 |
+
continue
|
199 |
+
roc_df.insert(loc=0, column="submission_id", value=roc_results.keys())
|
200 |
+
roc_df.insert(
|
201 |
+
loc=0,
|
202 |
+
column="team",
|
203 |
+
value=[
|
204 |
+
teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
|
205 |
+
for submission_id in roc_results.keys()
|
206 |
+
],
|
207 |
+
)
|
208 |
+
roc_df.insert(
|
209 |
+
loc=0,
|
210 |
+
column="submission_repo",
|
211 |
+
value=[
|
212 |
+
submission_summaries[
|
213 |
+
submission_summaries.team_id == member_map[team_submissions[submission_id]]
|
214 |
+
].submission_repo.values[0]
|
215 |
+
for submission_id in roc_results.keys()
|
216 |
+
],
|
217 |
+
)
|
218 |
+
roc_df["label"] = roc_df.apply(
|
219 |
+
lambda x: f"AUC: {round(x['auc'], 2)} - {x['team']} - {x['submission_repo']}", axis=1
|
220 |
+
)
|
221 |
+
rocs.append(roc_df)
|
222 |
+
|
223 |
+
## Append results to save in cache
|
224 |
+
public_df = pd.json_normalize(public_results.values())
|
225 |
+
public_df.insert(
|
226 |
+
loc=0,
|
227 |
+
column="submission",
|
228 |
+
value=[
|
229 |
+
teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
|
230 |
+
for submission_id in public_results.keys()
|
231 |
+
],
|
232 |
+
)
|
233 |
+
public_df.insert(
|
234 |
+
loc=0,
|
235 |
+
column="team",
|
236 |
+
value=[
|
237 |
+
teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
|
238 |
+
for submission_id in public_results.keys()
|
239 |
+
],
|
240 |
+
)
|
241 |
+
public_df.insert(
|
242 |
+
loc=0,
|
243 |
+
column="team_id",
|
244 |
+
value=[
|
245 |
+
teams[teams.id == member_map[team_submissions[submission_id]]].id.values[0]
|
246 |
+
for submission_id in public_results.keys()
|
247 |
+
],
|
248 |
+
)
|
249 |
+
public.append(public_df)
|
250 |
+
|
251 |
+
## Private results
|
252 |
+
private_df = pd.json_normalize(private_results.values())
|
253 |
+
private_df.insert(
|
254 |
+
loc=0,
|
255 |
+
column="submission",
|
256 |
+
value=[
|
257 |
+
teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
|
258 |
+
for submission_id in private_results.keys()
|
259 |
+
],
|
260 |
+
)
|
261 |
+
private_df.insert(
|
262 |
+
loc=0,
|
263 |
+
column="team",
|
264 |
+
value=[
|
265 |
+
teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
|
266 |
+
for submission_id in private_results.keys()
|
267 |
+
],
|
268 |
+
)
|
269 |
+
private_df.insert(
|
270 |
+
loc=0,
|
271 |
+
column="team_id",
|
272 |
+
value=[
|
273 |
+
teams[teams.id == member_map[team_submissions[submission_id]]].id.values[0]
|
274 |
+
for submission_id in private_results.keys()
|
275 |
+
],
|
276 |
+
)
|
277 |
+
private.append(private_df)
|
278 |
+
|
279 |
+
## Save as csvs
|
280 |
+
public = pd.concat(public, axis=0).sort_values(by="balanced_accuracy", ascending=False)
|
281 |
+
private = pd.concat(private, axis=0).sort_values(by="balanced_accuracy", ascending=False)
|
282 |
+
rocs = pd.concat(rocs, axis=0).explode(["tpr", "fpr", "threshold"], ignore_index=True)
|
283 |
+
public.to_csv(
|
284 |
+
Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_public_score.csv",
|
285 |
+
index=False,
|
286 |
+
)
|
287 |
+
private.to_csv(
|
288 |
+
Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_private_score.csv",
|
289 |
+
index=False,
|
290 |
+
)
|
291 |
+
rocs.to_csv(
|
292 |
+
Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_rocs.csv", index=False
|
293 |
+
)
|
294 |
+
submission_summaries.to_csv(
|
295 |
+
Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_submissions.csv",
|
296 |
+
index=False,
|
297 |
+
)
|
298 |
+
|
299 |
+
## Update time
|
300 |
+
now = datetime.now()
|
301 |
+
formatted = now.strftime("Updated on %Y-%m-%d %H:%M:%S EST")
|
302 |
+
with open("updated.txt", "w") as file:
|
303 |
+
file.write(formatted)
|