Spaces:

safe-challenge
/

video-challenge-leaderboard

Running

App Files Files Community

gmancino-ball commited on 16 days ago

Commit

56b1556

1 Parent(s): c7e15a9

gmb/initial-upload (#1)

Browse files

Files changed (12) hide show

.gitignore +6 -0
.streamlit/config.toml +2 -0
README.md +8 -5
app.py +447 -0
index.html +0 -19
metric.py +207 -0
pyproject.toml +15 -0
requirements.txt +4 -0
style.css +0 -28
test.sh +1 -0
updated.txt +1 -0
utils.py +303 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+temp*
+__pycache__
+.ipynb_checkpoints/
+competition_cache/
+.env
+.vscode/launch.json

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [browser]
2	+ gatherUsageStats = false

README.md CHANGED Viewed

@@ -1,10 +1,13 @@
 ---
 title: Video Challenge Leaderboard
-emoji: 🐨
-colorFrom: green
-colorTo: gray
-sdk: static
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Video Challenge Leaderboard
+emoji: 🏢
+colorFrom: yellow
+colorTo: blue
+sdk: streamlit
+sdk_version: 1.43.2
+app_file: app.py
 pinned: false
+short_description: Leaderboard
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,447 @@

+import streamlit as st
+from pathlib import Path
+import pandas as pd
+import altair as alt
+import subprocess
+import os
+## Save results path
+results_path = Path("competition_cache/cached_results")
+TASKS = ["video-challenge-pilot-config", "video-challenge-task-1-config"]
+valid_splits = ["public", "private"]
+## Check for files initially
+if not os.path.exists(results_path):
+    process = subprocess.Popen(
+        ["python3", "utils.py"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,  # Decode stdout/stderr as text
+    )
+    process.wait()
+    process.kill()
+#####################################################################
+##                            Data loading                         ##
+#####################################################################
+## Data loading
+@st.cache_data
+def load_results(task, best_only):
+    if best_only:
+        return {
+            f"{s}_score": pd.read_csv(f"{results_path}/{task}_{s}_score.csv")
+            .sort_values(["team", "balanced_accuracy"], ascending=False)
+            .drop_duplicates(subset=["team"])
+            .sort_values("balanced_accuracy", ascending=False)
+            .set_index("team")
+            for s in valid_splits
+        }
+    else:
+        return {
+            f"{s}_score": pd.read_csv(f"{results_path}/{task}_{s}_score.csv").set_index("team") for s in valid_splits
+        }
+@st.cache_data
+def load_submission():
+    out = []
+    for task in TASKS:
+        data = pd.read_csv(f"{results_path}/{task}_submissions.csv")
+        data["task"] = task
+        out.append(data)
+    return pd.concat(out, ignore_index=True)
+@st.cache_data
+def get_updated_time(file="updated.txt"):
+    return open(file).read()
+@st.cache_data
+def get_volume():
+    subs = pd.concat(
+        [pd.read_csv(f"{results_path}/{task}_submissions.csv") for task in TASKS],
+        ignore_index=True,
+    )
+    subs["datetime"] = pd.DatetimeIndex(subs["datetime"])
+    subs["date"] = subs["datetime"].dt.date
+    subs = subs.groupby(["date", "status_reason"]).size().unstack().fillna(0).reset_index()
+    return subs
+@st.cache_data
+def make_heatmap(results, label="generated", symbol="🤖"):
+    # Assuming df is your wide-format DataFrame (models as rows, datasets as columns)
+    df_long = results.set_index("team")
+    team_order = results.index.tolist()
+    df_long = df_long.loc[:, [c for c in df_long.columns if c.startswith(label) and "accuracy" not in c]]
+    df_long.columns = [c.replace(f"{label}_", "") for c in df_long.columns]
+    if "none" in df_long.columns:
+        df_long = df_long.drop(columns=["none"])
+    df_long = df_long.reset_index().melt(id_vars="team", var_name="source", value_name="acc")
+    # df_long.rename(columns={'index': 'source'}, inplace=True)
+    # df_long
+    # return
+    # Base chart for rectangles
+    base = alt.Chart(df_long).encode(
+        x=alt.X("source:O", title="Source", axis=alt.Axis(orient="top", labelAngle=-60)),
+        y=alt.Y("team:O", title="Team", sort=team_order),
+    )
+    # Heatmap rectangles
+    heatmap = base.mark_rect().encode(
+        color=alt.Color("acc:Q", scale=alt.Scale(scheme="greens"), title=f"{label} Accuracy")
+    )
+    # Text labels
+    text = base.mark_text(baseline="middle", fontSize=16).encode(
+        text=alt.Text("acc:Q", format=".2f"),
+        color=alt.condition(
+            alt.datum.acc < 0.5,  # you can tune this for readability
+            alt.value("black"),
+            alt.value("white"),
+        ),
+    )
+    # Combine heatmap and text
+    chart = (heatmap + text).properties(width=600, height=500, title=f"Accuracy on {symbol} {label} sources heatmap")
+    return chart
+@st.cache_data
+def make_roc_curves(task, best_only=False):
+    rocs = pd.read_csv(f"{results_path}/{task}_rocs.csv")
+    # if best_only:
+    #     rocs = rocs.sort_values(by=["auc"],ascending=False).drop_duplicates("team")
+    roc_chart = (
+        alt.Chart(rocs)
+        .mark_line()
+        .encode(
+            x="fpr",
+            y="tpr",
+            color="team:N",
+            detail="submission_id:N"
+        )
+    )
+    return roc_chart
+#####################################################################
+##                         Page definition                         ##
+#####################################################################
+## Set title
+st.set_page_config(
+    page_title="Leaderboard",
+    initial_sidebar_state="collapsed",
+    layout="wide",  # This makes the app use the full width of the screen
+)
+## Pull new results or toggle private public if you are an owner
+with st.sidebar:
+    hf_token = os.getenv("HF_TOKEN")
+    password = st.text_input("Admin login:", type="password")
+    if password == hf_token:
+        if st.button("Pull New Results"):
+            with st.spinner("Pullin new results", show_time=True):
+                try:
+                    process = subprocess.Popen(
+                        ["python3", "utils.py"],
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE,
+                        text=True,  # Decode stdout/stderr as text
+                    )
+                    st.success(f"Background task started with PID: {process.pid}")
+                    process.wait()
+                    process.kill()
+                    st.success(f"PID {process.pid} finished!")
+                    # If a user has the right perms, then this clears the cache
+                    load_results.clear()
+                    get_volume.clear()
+                    load_submission.clear()
+                    st.rerun()
+                except Exception as e:
+                    st.error(f"Error starting background task: {e}")
+        ## Initialize the toggle state in session_state if it doesn't exist
+        if "private_view" not in st.session_state:
+            st.session_state.private_view = False
+        # Create the toggle widget
+        # The 'value' parameter sets the initial state, here linked to session_state
+        # The 'key' parameter is crucial for identifying the widget across reruns and linking to session_state
+        toggle_value = st.toggle("Private Scores", value=st.session_state.private_view, key="private_view")
+        # The 'toggle_value' variable will hold the current state of the toggle (True or False)
+        if toggle_value:
+            st.write("Showing **PRIVATE** scores.")
+        else:
+            st.write("Showing **PUBLIC** scores.")
+        split = "public" if not toggle_value else "private"
+    else:
+        split = "public"
+def show_leaderboad(results,task):
+    cols = [
+        "generated_accuracy",
+        "real_accuracy",
+        # "pristine_accuracy",
+        "balanced_accuracy",
+        "auc",
+        "fail_rate",
+        "total_time",
+    ]
+    # st.dataframe(results[f"{split}_score"])
+    column_config = {
+        "balanced_accuracy": st.column_config.NumberColumn(
+            "⚖️ Balanced Accruacy",
+            format="compact",
+            min_value=0,
+            pinned=True,
+            max_value=1.0,
+            # width="small",
+        ),
+        "generated_accuracy": st.column_config.NumberColumn(
+            "🤖 True Postive Rate",
+            format="compact",
+            min_value=0,
+            pinned=True,
+            max_value=1.0,
+            # width="small",
+        ),
+        "real_accuracy": st.column_config.NumberColumn(
+            "🧑‍🎤 True Negative Rate",
+            format="compact",
+            min_value=0,
+            pinned=True,
+            max_value=1.0,
+            # width="small",
+        ),
+        "auc": st.column_config.NumberColumn(
+            "📐 AUC",
+            format="compact",
+            min_value=0,
+            pinned=True,
+            max_value=1.0,
+            # width="small",
+        ),
+        "fail_rate": st.column_config.NumberColumn(
+            "❌ Fail Rate",
+            format="compact",
+            # width="small",
+        ),
+        "total_time": st.column_config.NumberColumn(
+            "🕒 Inference Time",
+            format="compact",
+            # width="small",
+        ),
+    }
+    labels = {"real": "🧑‍🎤", "generated": "🤖"}
+    for c in results[f"{split}_score"].columns:
+        if "accuracy" in c:
+            continue
+        if any(p in c for p in ["generated", "real"]):
+            s = c.split("_")
+            pred = s[0]
+            source = " ".join(s[1:])
+            column_config[c] = st.column_config.NumberColumn(
+                labels[pred] + " " + source,
+                help=c,
+                format="compact",
+                min_value=0,
+                max_value=1.0,
+            )
+    "#### Summary"
+    st.dataframe(results[f"{split}_score"].loc[:, cols], column_config=column_config)
+    cond_bacc = st.toggle("Conditional Balanced Accuracy",value=False, key = f"cond_bacc_{task}")
+    cols = [c for c in results[f"{split}_score"].columns if "generated_" in c and "accuracy" not in c]
+    temp =  results[f"{split}_score"].loc[:, cols].copy()
+    if cond_bacc:
+        tnr = results[f"{split}_score"].loc[:, ["real_accuracy"]]
+        temp[:] = (temp.values + tnr.values)/2.
+        "#### 🤖 Balanced Accuracy | Generated Source"
+    else:
+        "#### 🤖 True Positive Rate | Generated Source"
+    st.dataframe(temp, column_config=column_config)
+    cols = [c for c in results[f"{split}_score"].columns if "real_" in c and "accuracy" not in c]
+    temp =  results[f"{split}_score"].loc[:, cols].copy()
+    if cond_bacc:
+        tpr = results[f"{split}_score"].loc[:, ["generated_accuracy"]]
+        temp[:] = (temp.values + tpr.values)/2.
+        "#### 🧑‍🎤 Balanced Accuracy | Real Source"
+    else:
+        "#### 🧑‍🎤 True Negative Rate | Real Source"
+    st.dataframe(temp,column_config=column_config)
+def make_roc(results):
+    results["FA"] = 1.0 - results["real_accuracy"]
+    chart = (
+        alt.Chart(results)
+        .mark_circle()
+        .encode(
+            x=alt.X("FA:Q", title="🧑‍🎤 False Positive Rate", scale=alt.Scale(domain=[0.0, 1.0])),
+            y=alt.Y("generated_accuracy:Q", title="🤖 True Positive Rate", scale=alt.Scale(domain=[0.0, 1.0])),
+            color="team:N",  # Color by categorical field
+            size=alt.Size(
+                "total_time:Q", title="🕒 Inference Time", scale=alt.Scale(rangeMin=100)
+            ),  # Size by quantitative field
+        )
+        .properties(width=400, height=400, title="Detection vs False Alarm vs Inference Time")
+    )
+    diag_line = (
+        alt.Chart(pd.DataFrame(dict(tpr=[0, 1], fpr=[0, 1])))
+        .mark_line(color="lightgray", strokeDash=[8, 4])
+        .encode(x="fpr", y="tpr")
+    )
+    return chart + diag_line
+def make_acc(results):
+    # results["FA"] = 1. - results["pristine_accuracy"]
+    # results = results[results["total_time"] >= 0]
+    # results["total_time"] = results["total_time"]
+    results = results.loc[results["total_time"] >= 0]
+    chart = (
+        alt.Chart(results)
+        .mark_circle(size=200)
+        .encode(
+            x=alt.X("total_time:Q", title="🕒 Inference Time", scale = alt.Scale(domain=[0., 10000])),
+            y=alt.Y(
+                "balanced_accuracy:Q",
+                title="Balanced Accuracy",
+                scale=alt.Scale(domain=[0.4, 1]),
+            ),
+            color="team:N",  # Color by categorical field # Size by quantitative field
+        )
+        .properties(width=400, height=400, title="Inference Time vs Balanced Accuracy")
+    )
+    diag_line = (
+        alt.Chart(pd.DataFrame(dict(t=[0, results["total_time"].max()], y=[0.5, 0.5])))
+        .mark_line(color="lightgray", strokeDash=[8, 4])
+        .encode(x="t", y="y")
+    )
+    return chart + diag_line
+def get_heatmaps(temp):
+    h1 = make_heatmap(temp, "generated", symbol="🤖")
+    h2 = make_heatmap(temp, "real", symbol="🧑‍🎤")
+    st.altair_chart(h1, use_container_width=True)
+    st.altair_chart(h2, use_container_width=True)
+    if temp.columns.str.contains("aug", case=False).any():
+        h3 = make_heatmap(temp, "aug", symbol="🛠️")
+        st.altair_chart(h3, use_container_width=True)
+def make_plots_for_task(task, split, best_only):
+    results = load_results(task, best_only=best_only)
+    # results1[f"{split}_score"]
+    temp = results[f"{split}_score"].reset_index()
+    # st.write(temp)
+    t1, t2 = st.tabs(["Tables", "Charts"])
+    with t1:
+        show_leaderboad(results,task)
+    with t2:
+        roc_scatter =  make_roc(temp)
+        acc_vs_time = make_acc(temp)
+        if split == "private" and hf_token is not None:
+            # with t2:
+            full_curves = st.toggle("full curves", value=True, key=f"all curves {task}")
+            if full_curves:
+                roc_scatter = make_roc_curves(task, best_only) + roc_scatter
+            st.altair_chart(roc_scatter | acc_vs_time, use_container_width=False)
+        else:
+            # with t2:
+            st.altair_chart(roc_scatter | acc_vs_time, use_container_width=False)
+    # with t3:
+    #     get_heatmaps(temp)
+updated = get_updated_time()
+st.markdown(updated)
+# st.markdown("#### Detailed Public Leaderboard")
+# st.markdown("[SAFE: Synthetic Audio Forensics Evaluation Challenge](https://stresearch.github.io/SAFE/)")
+best_only = True  # st.toggle("Only Best per Team", value=True)
+# show_chart = st.toggle("Show Table", value=True)
+tp, t1, volume_tab, all_submission_tab = st.tabs(["**Pilot Task**","**Task 1**", "**Submission Volume**", "**All Submissions**"])
+with tp:
+    "*Detection of Synthetic Video Content. Video files are unmodified from the original output from the models or the real sources.*"
+    make_plots_for_task(TASKS[0], split, best_only)
+with t1:
+    "*Detection of Synthetic Video Content. Video files are unmodified from the original output from the models or the real sources.*"
+    make_plots_for_task(TASKS[1], split, best_only)
+with volume_tab:
+    subs = get_volume()
+    status_lookup = "QUEUED,PROCESSING,SUCCESS,FAILED".split(",")
+    found_columns = subs.columns.values.tolist()
+    status_lookup = list(set(status_lookup) & set(found_columns))
+    st.bar_chart(subs, x="date", y=status_lookup, stack=True)
+    total_submissions = int(subs.loc[:, status_lookup].fillna(0).values.sum())
+    st.metric("Total Submissions", value=total_submissions)
+    st.metric("Duration", f'{(subs["date"].max() - subs["date"].min()).days} days')
+if split == "private":
+    with all_submission_tab:
+        data = load_submission()
+        st.dataframe(data)

index.html DELETED Viewed

@@ -1,19 +0,0 @@
-<!doctype html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
-</html>

metric.py ADDED Viewed

	@@ -0,0 +1,207 @@

+import json
+import re
+import pandas as pd
+from huggingface_hub import hf_hub_download
+from sklearn.metrics import roc_auc_score, roc_curve
+import numpy as np
+def compute_roc(solution_df):
+    ## fix weird submissions
+    if isinstance(solution_df.iloc[0]["score"], str):
+        solution_df.loc[:, "score"] = solution_df.loc[:, "score"].apply(
+            lambda a: float(
+                # np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze()
+                np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze()
+                if isinstance(a, str)
+                else float("nan")
+            )
+        )
+    isna = solution_df["score"].isna()
+    if isna.all():
+        ## if all nans
+        return -1
+    solution_df = solution_df.loc[~isna]
+    auc = roc_auc_score(solution_df["pred"] == "generated", solution_df["score"])
+    return auc
+def compute_roc_curve(solution_df, keep_every: int = 10):
+    ## fix weird submissions
+    if isinstance(solution_df.iloc[0]["score"], str):
+        solution_df.loc[:, "score"] = solution_df.loc[:, "score"].apply(
+            lambda a: float(
+                # np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze()
+                np.array(json.loads(re.sub(r"\b(\d+)\.(?!\d)", r"\1.0", a))).squeeze()
+                if isinstance(a, str)
+                else float("nan")
+            )
+        )
+    isna = solution_df["score"].isna()
+    if isna.all():
+        ## if all nans
+        return {"fpr": [], "tpr": [], "threshold": []}
+    solution_df = solution_df.loc[~isna]
+    fpr, tpr, threshold = roc_curve(solution_df["pred"] == "generated", solution_df["score"])
+    if len(fpr) < keep_every:
+        return {"fpr": fpr.tolist(), "tpr": tpr.tolist(), "threshold": threshold.tolist()}
+    # Sample every keep_every
+    return {
+        "fpr": fpr.tolist()[::keep_every],
+        "tpr": tpr.tolist()[::keep_every],
+        "threshold": threshold.tolist()[::keep_every],
+    }
+def _metric(solution_df, submission_df, mode="top_level", full: bool = False):
+    """
+    This function calculates the accuracy of the generated predictions.
+    Parameters
+    ----------
+    solution_df : pandas.DataFrame
+        The dataframe containing the solution data.
+    submission_df : pandas.DataFrame
+        The dataframe containing the submission data.
+    mode : str, optional
+        The mode of evaluation. Can be "top_level" or "bottom_level". The default is "top_level".
+    full: bool, optional
+        Full evaluation mode breaks up scores by source (both anonymized and original)
+    Returns
+    -------
+    None.
+    """
+    ## Allocate space
+    evaluation = {}
+    ## Ensure alignment of keys and group relevant columns
+    solution_df["submission_pred"] = solution_df.join(submission_df, lsuffix="_solution", rsuffix="_submission")[
+        "pred_submission"
+    ].values
+    cols = ["split", "pred", "source", "source_og"]
+    solution_df["correct"] = solution_df["pred"] == solution_df["submission_pred"]
+    accuracy = solution_df.groupby(cols)["correct"].mean().to_frame("accuracy").reset_index()
+    accuracy["score_name"] = accuracy["pred"] + "_" + accuracy["source"]
+    ## Create public dataframe and private dataframe
+    public_df = accuracy.query(f"split=='public'").copy()
+    private_df = accuracy.copy()
+    private_df["score_name"] = private_df["pred"] + "_" + private_df["source_og"]
+    ## Perform a loop over categories for reported metrics
+    for split, temp in zip(["public", "private"], [public_df, private_df]):
+        scores_by_source = temp.set_index("score_name")["accuracy"].sort_index()
+        scores_by_source["generated_accuracy"] = temp.query("pred=='generated'")["accuracy"].mean()
+        scores_by_source["real_accuracy"] = temp.query("pred=='real'")["accuracy"].mean()
+        scores_by_source["balanced_accuracy"] = (
+            scores_by_source["generated_accuracy"] + scores_by_source["real_accuracy"]
+        ) / 2.0
+        if mode == "top_level":
+            scores_to_save = ["generated_accuracy", "real_accuracy", "balanced_accuracy"]
+            evaluation[f"{split}_score"] = scores_by_source.loc[scores_to_save].to_dict()
+        else:
+            evaluation[f"{split}_score"] = scores_by_source.to_dict()
+        ## Compute by source - anonymized and original
+        # if full:
+            # evaluation[f"{split}_score"]["anon_source"] = temp.groupby("source")["accuracy"].mean().to_dict()
+            # evaluation[f"{split}_score"]["original_source"] = temp.groupby("source_og")["accuracy"].mean().to_dict()
+    ## Save data split
+    evaluation["public_score"]["proportion"] = len(solution_df.query(f"split=='public'").copy()) / len(solution_df)
+    evaluation["private_score"]["proportion"] = 1.0
+    ## Compute AUC
+    if "score" in submission_df.columns:
+        solution_df["score"] = submission_df["score"]
+        ## Public
+        split = "public"
+        temp = solution_df.query(f"split=='{split}'").copy()
+        try:
+            auc = compute_roc(temp)
+        except Exception as e:
+            print("failed auc")
+            print(e)
+            auc = "nan"
+        evaluation[f"{split}_score"]["auc"] = float(auc)
+        evaluation[f"{split}_score"]["fail_rate"] = float(temp["score"].isna().mean())
+        ## Private
+        split = "private"
+        temp = solution_df.copy()
+        try:
+            auc = compute_roc(temp)
+        except Exception as e:
+            print("failed auc")
+            print(e)
+            auc = "nan"
+        evaluation[f"{split}_score"]["auc"] = float(auc)
+        evaluation[f"{split}_score"]["fail_rate"] = float(temp["score"].isna().mean())
+    ## Full data computations
+    if not full:
+        return evaluation
+    ## Roc
+    if "score" in submission_df.columns:
+        solution_df["score"] = submission_df["score"]
+        ## Public
+        split = "public"
+        temp = solution_df.query(f"split=='{split}'").copy()
+        try:
+            roc_curve = compute_roc_curve(temp)
+        except Exception as e:
+            print("failed roc")
+            print(e)
+            roc_curve = {"fpr": [], "tpr": [], "threshold": []}
+        evaluation[f"{split}_score"]["roc"] = roc_curve
+        ## Private
+        split = "private"
+        temp = solution_df.copy()
+        try:
+            roc_curve = compute_roc_curve(temp)
+        except Exception as e:
+            print("failed roc")
+            print(e)
+            roc_curve = {"fpr": [], "tpr": [], "threshold": []}
+        evaluation[f"{split}_score"]["roc"] = roc_curve
+    return evaluation
+def compute(params):
+    solution_file = hf_hub_download(
+        repo_id=params.competition_id,
+        filename="solution.csv",
+        token=params.token,
+        repo_type="dataset",
+    )
+    solution_df = pd.read_csv(solution_file).set_index(params.submission_id_col)
+    submission_filename = f"submissions/{params.team_id}-{params.submission_id}.csv"
+    submission_file = hf_hub_download(
+        repo_id=params.competition_id,
+        filename=submission_filename,
+        token=params.token,
+        repo_type="dataset",
+    )
+    submission_df = pd.read_csv(submission_file).set_index(params.submission_id_col)
+    return _metric(solution_df, submission_df)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+[project]
+name = "leaderboard"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "streamlit",
+    "pandas",
+    "altair",
+    "scikit-learn",
+    "huggingface_hub",
+    "vl-convert-python",
+    "hf_transfer"
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+scikit-learn
+numpy
+streamlit
+huggingface_hub

style.css DELETED Viewed

@@ -1,28 +0,0 @@
-body {
-	padding: 2rem;
-	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
-}
-h1 {
-	font-size: 16px;
-	margin-top: 0;
-}
-p {
-	color: rgb(107, 114, 128);
-	font-size: 15px;
-	margin-bottom: 10px;
-	margin-top: 5px;
-}
-.card {
-	max-width: 620px;
-	margin: 0 auto;
-	padding: 16px;
-	border: 1px solid lightgray;
-	border-radius: 16px;
-}
-.card p:last-child {
-	margin-bottom: 0;
-}

test.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ HF_TOKEN=test streamlit run app.py

updated.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Updated on 2025-08-06 11:28:08 EST

utils.py ADDED Viewed

	@@ -0,0 +1,303 @@

+import json
+from datetime import datetime
+from pathlib import Path
+from huggingface_hub import snapshot_download
+import tqdm.auto as tqdm
+from typing import Any, Dict, List, Tuple
+from collections import defaultdict
+from metric import _metric
+import os
+import pandas as pd
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "20"
+COMP_CACHE = os.environ.get("COMP_CACHE", "./competition_cache")
+def download_competition_data(competition_names: List[str]) -> None:
+    """Download copies to local environment"""
+    for repo_id in tqdm.tqdm(competition_names):
+        snapshot_download(
+            repo_id=repo_id,
+            local_dir=os.path.join(COMP_CACHE, repo_id),
+            repo_type="dataset",
+            token=os.environ.get("HF_TOKEN"),
+        )
+STATUS_MAP = {0: "PENDING", 1: "QUEUED", 2: "PROCESSING", 3: "SUCCESS", 4: "FAILED"}
+## Make a directory to store computed results
+os.makedirs(Path("competition_cache") / "cached_results", exist_ok=True)
+def load_teams(competition_space_path: Path) -> pd.DataFrame:
+    team_file_name = "teams.json"
+    return pd.read_json(Path(competition_space_path) / team_file_name).T
+def json_to_dataframe(data, extra_column_name=None, extra_column_value=None):
+    flat_data = []
+    for entry in data:
+        original_flat_entry = {**entry}
+        flat_entry = {k: v for k, v in original_flat_entry.items() if not "score" in k}
+        times = {
+            k.replace("score", "time"): v.get("total_time", -1) for k, v in original_flat_entry.items() if "score" in k
+        }
+        flat_entry.update(times)
+        if extra_column_name:
+            flat_entry[extra_column_name] = extra_column_value
+        flat_data.append(flat_entry)
+    df = pd.DataFrame(flat_data)
+    return df
+def load_submission_map(competition_space_path: Path) -> Tuple[Dict[str, str], pd.DataFrame]:
+    submission_info_dir = "submission_info"
+    submission_info_files = list((Path(competition_space_path) / submission_info_dir).glob("*.json"))
+    # Loop and collect submission IDs by team
+    team_submissions: Dict[str, str] = {}
+    submission_summaries: List[pd.DataFrame] = []
+    for file in submission_info_files:
+        with open(file, "r") as fn:
+            json_data = json.load(fn)
+        submission_summaries.append(
+            json_to_dataframe(
+                data=json_data["submissions"], extra_column_name="team_id", extra_column_value=json_data["id"]
+            )
+        )
+        submission_list = pd.read_json(file).submissions.values.tolist()
+        for submission in submission_list:
+            team_submissions[submission["submission_id"]] = submission["submitted_by"]
+    submission_summary = pd.concat(submission_summaries, axis=0)
+    submission_summary["status_reason"] = submission_summary["status"].apply(lambda x: STATUS_MAP[x])
+    return team_submissions, submission_summary
+def get_member_to_team_map(teams: pd.DataFrame, team_submissions: Dict[str, str]) -> Dict[str, str]:
+    member_map: Dict[str, str] = {}
+    for member_id in team_submissions.values():
+        member_map[member_id] = teams[teams.members.apply(lambda x: member_id in x)].id.values[0]
+    return member_map
+def load_submissions(competition_space_path: Path) -> Dict[str, Dict[str, pd.DataFrame]]:
+    submission_dir = "submissions"
+    submissions: Dict[str, Dict[str, pd.DataFrame]] = defaultdict(dict)
+    for file in list((Path(competition_space_path) / submission_dir).glob("*.csv")):
+        file_name = str(file).split("/")[-1].split(".")[0]
+        team_id = "-".join(file_name.split("/")[-1].split("-")[:5])
+        sub_id = "-".join(file_name.split("/")[-1].split("-")[5:])
+        submissions[team_id][sub_id] = pd.read_csv(file).set_index("id")
+    return submissions
+def compute_metric_per_team(solution_df: pd.DataFrame, team_submissions: Dict[str, pd.DataFrame]) -> Dict[str, Any]:
+    results: Dict[str, Any] = {}
+    for submission_id, submission in team_submissions.items():
+        results[submission_id] = _metric(solution_df=solution_df, submission_df=submission, mode="detailed", full=True)
+    return results
+def prep_public(public_results: Dict[str, Any]) -> Dict[str, Any]:
+    new: Dict[str, Any] = {}
+    for key, value in public_results.items():
+        # if key == "anon_source":
+        #     for sub_key, sub_value in value.items():
+        #         sub_key = ("generated" if sub_key[0] == "g" else "real") + "_" + sub_key.split("_")[-1]
+        #         new[sub_key] = sub_value
+        #     continue
+        if key in ["proportion", "roc", "original_source"]:
+            continue
+        new[key] = value
+    return new
+def prep_private(private_results: Dict[str, Any]) -> Dict[str, Any]:
+    new: Dict[str, Any] = {}
+    for key, value in private_results.items():
+        # if key == "original_source":
+        #     for sub_key, sub_value in value.items():
+        #         sub_key = ("real" if sub_key in REAL_MAP else "generated") + "_" + sub_key
+        #         new[sub_key] = sub_value
+        #     continue
+        if key in ["proportion", "roc", "anon_source"]:
+            continue
+        new[key] = value
+    return new
+def extract_roc(results: Dict[str, Any]) -> Dict[str, Any]:
+    new: Dict[str, Any] = {}
+    for key, value in results.items():
+        if key in ["roc"]:
+            for sub_key, sub_value in value.items():
+                new[sub_key] = sub_value
+            continue
+        if key in ["auc"]:
+            new[key] = value
+    return new
+if __name__ == "__main__":
+    ## Download data
+    spaces: List[str] = ["safe-challenge/video-challenge-pilot-config", "safe-challenge/video-challenge-task-1-config"]
+    download_competition_data(competition_names=spaces)
+    ## Loop
+    for space in spaces:
+        local_dir = Path("competition_cache") / space
+        ## Load relevant data
+        teams = load_teams(competition_space_path=local_dir)
+        team_submissions, submission_summaries = load_submission_map(competition_space_path=local_dir)
+        member_map = get_member_to_team_map(teams=teams, team_submissions=team_submissions)
+        submissions = load_submissions(competition_space_path=local_dir)
+        ## Load solutions
+        solutions_df = pd.read_csv(local_dir / "solution.csv").set_index("id")
+        ## Loop and save by team
+        public, private, rocs = [], [], []
+        for team_id, submission_set in submissions.items():
+            results = compute_metric_per_team(solution_df=solutions_df, team_submissions=submission_set)
+            public_results = {
+                key: prep_public(value["public_score"]) for key, value in results.items() if key in team_submissions
+            }
+            private_results = {
+                key: prep_private(value["private_score"]) for key, value in results.items() if key in team_submissions
+            }
+            ## Add timing
+            public_times = {
+                x["submission_id"]: x["public_time"]
+                for x in submission_summaries[submission_summaries["submission_id"].isin(results.keys())][
+                    ["submission_id", "public_time"]
+                ].to_dict(orient="records")
+            }
+            private_times = {
+                x["submission_id"]: x["private_time"]
+                for x in submission_summaries[submission_summaries["submission_id"].isin(results.keys())][
+                    ["submission_id", "private_time"]
+                ].to_dict(orient="records")
+            }
+            for key in public_results.keys():
+                public_results[key]["total_time"] = public_times[key]
+            for key in private_results.keys():
+                private_results[key]["total_time"] = private_times[key]
+            ## Roc computations
+            roc_results = {
+                key: extract_roc(value["private_score"]) for key, value in results.items() if key in team_submissions
+            }
+            roc_df = pd.json_normalize(roc_results.values())
+            if len(roc_df)==0:
+                continue
+            roc_df.insert(loc=0, column="submission_id", value=roc_results.keys())
+            roc_df.insert(
+                loc=0,
+                column="team",
+                value=[
+                    teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
+                    for submission_id in roc_results.keys()
+                ],
+            )
+            roc_df.insert(
+                loc=0,
+                column="submission_repo",
+                value=[
+                    submission_summaries[
+                        submission_summaries.team_id == member_map[team_submissions[submission_id]]
+                    ].submission_repo.values[0]
+                    for submission_id in roc_results.keys()
+                ],
+            )
+            roc_df["label"] = roc_df.apply(
+                lambda x: f"AUC: {round(x['auc'], 2)} - {x['team']} - {x['submission_repo']}", axis=1
+            )
+            rocs.append(roc_df)
+            ## Append results to save in cache
+            public_df = pd.json_normalize(public_results.values())
+            public_df.insert(
+                loc=0,
+                column="submission",
+                value=[
+                    teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
+                    for submission_id in public_results.keys()
+                ],
+            )
+            public_df.insert(
+                loc=0,
+                column="team",
+                value=[
+                    teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
+                    for submission_id in public_results.keys()
+                ],
+            )
+            public_df.insert(
+                loc=0,
+                column="team_id",
+                value=[
+                    teams[teams.id == member_map[team_submissions[submission_id]]].id.values[0]
+                    for submission_id in public_results.keys()
+                ],
+            )
+            public.append(public_df)
+            ## Private results
+            private_df = pd.json_normalize(private_results.values())
+            private_df.insert(
+                loc=0,
+                column="submission",
+                value=[
+                    teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
+                    for submission_id in private_results.keys()
+                ],
+            )
+            private_df.insert(
+                loc=0,
+                column="team",
+                value=[
+                    teams[teams.id == member_map[team_submissions[submission_id]]].name.values[0]
+                    for submission_id in private_results.keys()
+                ],
+            )
+            private_df.insert(
+                loc=0,
+                column="team_id",
+                value=[
+                    teams[teams.id == member_map[team_submissions[submission_id]]].id.values[0]
+                    for submission_id in private_results.keys()
+                ],
+            )
+            private.append(private_df)
+        ## Save as csvs
+        public = pd.concat(public, axis=0).sort_values(by="balanced_accuracy", ascending=False)
+        private = pd.concat(private, axis=0).sort_values(by="balanced_accuracy", ascending=False)
+        rocs = pd.concat(rocs, axis=0).explode(["tpr", "fpr", "threshold"], ignore_index=True)
+        public.to_csv(
+            Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_public_score.csv",
+            index=False,
+        )
+        private.to_csv(
+            Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_private_score.csv",
+            index=False,
+        )
+        rocs.to_csv(
+            Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_rocs.csv", index=False
+        )
+        submission_summaries.to_csv(
+            Path("competition_cache") / "cached_results" / f"{str(local_dir).split('/')[-1]}_submissions.csv",
+            index=False,
+        )
+    ## Update time
+    now = datetime.now()
+    formatted = now.strftime("Updated on %Y-%m-%d %H:%M:%S EST")
+    with open("updated.txt", "w") as file:
+        file.write(formatted)