abdev-leaderboard

Running

App Files Files Community

loodvanniekerkginkgo commited on 5 days ago

Commit

d6a0c44

1 Parent(s): 393870b

Added new validation for very high spearman correlations

Browse files

Files changed (6) hide show

about.py +2 -1
app.py +20 -5
constants.py +9 -9
evaluation.py +152 -0
submit.py +2 -0
validation.py +16 -3

about.py CHANGED Viewed

@@ -155,7 +155,8 @@ We may release private test set results at intermediate points during the compet
 ## Cross-validation
 For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
-Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
 Submissions close on **1 November 2025**.
 """

 ## Cross-validation
 For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
+Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
+We will be releasing a tutorial on cross-validation shortly.
 Submissions close on **1 November 2025**.
 """

app.py CHANGED Viewed

@@ -170,8 +170,8 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
         with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
             gr.Markdown(SUBMIT_INTRUCTIONS)
-            submission_type_state = gr.State(value="GDPa1")
-            download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1"])
             with gr.Row():
                 with gr.Column():
@@ -204,16 +204,31 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
                         placeholder="Enter your registration code",
                         info="If you did not receive a registration code, please sign up on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>.",
                     )
                 with gr.Column():
                     submission_type_dropdown = gr.Dropdown(
                         choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
-                        value="GDPa1",
                         label="Submission Type",
                         info=f"Choose the dataset corresponding to the track you're participating in. See the '{ABOUT_TAB_NAME}' tab for details.",
                     )
                     download_button = gr.DownloadButton(
                         label="📥 Download example submission CSV for GDPa1",
-                        value=EXAMPLE_FILE_DICT["GDPa1"],
                         variant="secondary",
                     )
                     submission_file = gr.File(label="Submission CSV")
@@ -291,4 +306,4 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
     )
 if __name__ == "__main__":
-    demo.launch(ssr_mode=False)

         with gr.TabItem(SUBMIT_TAB_NAME, elem_id="boundary-benchmark-tab-table"):
             gr.Markdown(SUBMIT_INTRUCTIONS)
+            submission_type_state = gr.State(value="GDPa1_cross_validation")
+            download_file_state = gr.State(value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"])
             with gr.Row():
                 with gr.Column():
                         placeholder="Enter your registration code",
                         info="If you did not receive a registration code, please sign up on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>.",
                     )
+                    # Extra validation / warning
+                    # Add the conditional warning checkbox
+                    high_corr_warning = gr.Markdown(
+                        value="",
+                        visible=False,
+                        elem_classes=["warning-box"]
+                    )
+                    high_corr_checkbox = gr.Checkbox(
+                        label="I understand this may be overfitting",
+                        value=False,
+                        visible=False,
+                        info="This checkbox will appear if your submission shows suspiciously high correlations (>0.9).",
+                    )
                 with gr.Column():
                     submission_type_dropdown = gr.Dropdown(
                         choices=["GDPa1", "GDPa1_cross_validation", "Heldout Test Set"],
+                        value="GDPa1_cross_validation",
                         label="Submission Type",
                         info=f"Choose the dataset corresponding to the track you're participating in. See the '{ABOUT_TAB_NAME}' tab for details.",
                     )
                     download_button = gr.DownloadButton(
                         label="📥 Download example submission CSV for GDPa1",
+                        value=EXAMPLE_FILE_DICT["GDPa1_cross_validation"],
                         variant="secondary",
                     )
                     submission_file = gr.File(label="Submission CSV")
     )
 if __name__ == "__main__":
+    demo.launch(ssr_mode=False, share=True)

constants.py CHANGED Viewed

@@ -28,6 +28,13 @@ ASSAY_EMOJIS = {
     "Tm2": "🌡️",
     "Titer": "🧪",
 }
 # Tabs with emojis
 ABOUT_TAB_NAME = "📖 About / Rules"
 FAQ_TAB_NAME = "❓ FAQs"
@@ -50,15 +57,8 @@ EXAMPLE_FILE_DICT = {
     "GDPa1_cross_validation": "data/example-predictions-cv.csv",
     "Heldout Test Set": "data/example-predictions-heldout.csv",
 }
-ANTIBODY_NAMES_DICT = {
-    "GDPa1": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1"])["antibody_name"].tolist(),
-    "GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
-        "antibody_name"
-    ].tolist(),
-    "Heldout Test Set": pd.read_csv(EXAMPLE_FILE_DICT["Heldout Test Set"])[
-        "antibody_name"
-    ].tolist(),
-}
 # Huggingface API
 TOKEN = os.environ.get("HF_TOKEN")

     "Tm2": "🌡️",
     "Titer": "🧪",
 }
+ASSAY_HIGHER_IS_BETTER = {
+    "HIC": False,
+    "Tm2": True,
+    "Titer": True,
+    "PR_CHO": False,
+    "AC-SINS_pH7.4": False,
+}
 # Tabs with emojis
 ABOUT_TAB_NAME = "📖 About / Rules"
 FAQ_TAB_NAME = "❓ FAQs"
     "GDPa1_cross_validation": "data/example-predictions-cv.csv",
     "Heldout Test Set": "data/example-predictions-heldout.csv",
 }
+# GDPa1 dataset
+GDPa1_path = "hf://datasets/ginkgo-datapoints/GDPa1/GDPa1_v1.2_20250814.csv"
 # Huggingface API
 TOKEN = os.environ.get("HF_TOKEN")

evaluation.py ADDED Viewed

	@@ -0,0 +1,152 @@

+from collections import defaultdict
+from scipy.stats import spearmanr
+import pandas as pd
+import numpy as np
+from constants import ASSAY_LIST, ASSAY_HIGHER_IS_BETTER
+FOLD_COL = "hierarchical_cluster_IgG_isotype_stratified_fold"
+def recall_at_k(y_true: np.ndarray, y_pred: np.ndarray, frac: float = 0.1) -> float:
+    """Calculate recall (TP)/(TP+FN) for top fraction of true values.
+    A recall of 1 would mean that the top fraction of true values are also the top fraction of predicted values.
+    There is no penalty for ranking the top k differently.
+    Args:
+        y_true (np.ndarray): true values with shape (num_data,)
+        y_pred (np.ndarray): predicted values with shape (num_data,)
+        frac (float, optional): fraction of data points to consider as the top. Defaults to 0.1.
+    Returns:
+        float: recall at top k of data
+    """
+    top_k = int(len(y_true) * frac)
+    y_true, y_pred = np.array(y_true).flatten(), np.array(y_pred).flatten()
+    true_top_k = np.argsort(y_true)[-1 * top_k :]
+    predicted_top_k = np.argsort(y_pred)[-1 * top_k :]
+    return (
+        len(
+            set(list(true_top_k.flatten())).intersection(
+                set(list(predicted_top_k.flatten()))
+            )
+        )
+        / top_k
+    )
+def get_metrics(
+    predictions_series: pd.Series, target_series: pd.Series, assay_col: str
+) -> dict[str, float]:
+    results_dict = {
+        "spearman": spearmanr(
+            predictions_series, target_series, nan_policy="omit"
+        ).correlation
+    }
+    # Top 10% recall
+    y_true = target_series.values
+    y_pred = predictions_series.values
+    if not ASSAY_HIGHER_IS_BETTER[assay_col]:
+        y_true = -1 * y_true
+        y_pred = -1 * y_pred
+    results_dict["top_10_recall"] = recall_at_k(y_true=y_true, y_pred=y_pred, frac=0.1)
+    return results_dict
+def get_metrics_cross_validation(
+    predictions_series: pd.Series,
+    target_series: pd.Series,
+    folds_series: pd.Series,
+    assay_col: str,
+) -> dict[str, float]:
+    # Run evaluate in a cross-validation loop
+    results_dict = defaultdict(list)
+    if folds_series.nunique() != 5:
+        raise ValueError(f"Expected 5 folds, got {folds_series.nunique()}")
+    for fold in folds_series.unique():
+        predictions_series_fold = predictions_series[folds_series == fold]
+        target_series_fold = target_series[folds_series == fold]
+        results = get_metrics(predictions_series_fold, target_series_fold, assay_col)
+        # Update the results_dict with the results for this fold
+        for key, value in results.items():
+            results_dict[key].append(value)
+    # Calculate the mean of the results for each key (could also add std dev later)
+    for key, values in results_dict.items():
+        results_dict[key] = np.mean(values)
+    return results_dict
+def _get_result_for_assay(df_merged, assay_col, dataset_name):
+    """
+    Return a dictionary with the results for a single assay.
+    """
+    if dataset_name == "GDPa1_cross_validation":
+        results = get_metrics_cross_validation(
+            df_merged[assay_col + "_pred"],
+            df_merged[assay_col + "_true"],
+            df_merged[FOLD_COL],
+            assay_col,
+        )
+    elif dataset_name == "GDPa1":
+        results = get_metrics(
+            df_merged[assay_col + "_pred"], df_merged[assay_col + "_true"], assay_col
+        )
+    elif dataset_name == "Heldout Test Set":
+        # Just record these as NaNs for now - they'll appear on the leaderboard and we can handle them on their own
+        results = {"spearman": np.nan, "top_10_recall": np.nan}
+    results["assay"] = assay_col
+    return results
+def _get_error_result(assay_col, dataset_name, error):
+    """
+    Return a dictionary with the error message instead of metrics.
+    Used when _get_result_for_assay fails.
+    """
+    print(f"Error evaluating {assay_col}: {error}")
+    # Add a failed result record with error information
+    error_result = {
+        "dataset": dataset_name,
+        "assay": assay_col,
+    }
+    error_result.update({"spearman": error, "top_10_recall": error})
+    return error_result
+def evaluate(predictions_df, target_df, dataset_name="GDPa1"):
+    """
+    Evaluates a single model, where the predictions dataframe has columns named by property.
+    eg. my_model.csv has columns antibody_name, HIC, Tm2
+    Lood: Copied from Github repo, which I should move over here
+    """
+    properties_in_preds = [
+        col for col in predictions_df.columns if col in ASSAY_LIST
+    ]
+    df_merged = pd.merge(
+        target_df[["antibody_name", FOLD_COL] + ASSAY_LIST],
+        predictions_df[["antibody_name"] + properties_in_preds],
+        on="antibody_name",
+        how="left",
+        suffixes=("_true", "_pred"),
+    )
+    results_list = []
+    # Process each property one by one for better error handling
+    for assay_col in properties_in_preds:
+        try:
+            results = _get_result_for_assay(
+                df_merged, assay_col, dataset_name
+            )
+            results_list.append(results)
+        except Exception as e:
+            error_result = _get_error_result(
+                assay_col, dataset_name, e
+            )
+            results_list.append(error_result)
+    results_df = pd.DataFrame(results_list)
+    return results_df

submit.py CHANGED Viewed

@@ -98,6 +98,8 @@ def make_submission(
     if path_obj.suffix.lower() != ".csv":
         raise gr.Error("File must be a CSV file. Please upload a .csv file.")
     upload_submission(
         file_path=path_obj,
         user_state=user_state,

     if path_obj.suffix.lower() != ".csv":
         raise gr.Error("File must be a CSV file. Please upload a .csv file.")
     upload_submission(
         file_path=path_obj,
         user_state=user_state,

validation.py CHANGED Viewed

@@ -7,8 +7,9 @@ from constants import (
     ASSAY_LIST,
     CV_COLUMN,
     EXAMPLE_FILE_DICT,
-    ANTIBODY_NAMES_DICT,
 )
 def validate_username(username: str) -> bool:
@@ -137,6 +138,7 @@ def validate_cv_submission(
         raise gr.Error(
             f"❌ Fold assignments don't match canonical CV folds: {'; '.join(examples)}"
         )
 def validate_full_dataset_submission(df: pd.DataFrame) -> None:
@@ -202,9 +204,11 @@ def validate_dataframe(df: pd.DataFrame, submission_type: str = "GDPa1") -> None
         raise gr.Error(
             f"❌ CSV should have only one row per antibody. Found {n_duplicates} duplicates."
         )
     # All antibody names should be recognizable
     unrecognized_antibodies = set(df["antibody_name"]) - set(
-        ANTIBODY_NAMES_DICT[submission_type]
     )
     if unrecognized_antibodies:
         raise gr.Error(
@@ -212,7 +216,8 @@ def validate_dataframe(df: pd.DataFrame, submission_type: str = "GDPa1") -> None
         )
     # All antibody names should be present
-    missing_antibodies = set(ANTIBODY_NAMES_DICT[submission_type]) - set(
         df["antibody_name"]
     )
     if missing_antibodies:
@@ -224,6 +229,14 @@ def validate_dataframe(df: pd.DataFrame, submission_type: str = "GDPa1") -> None
         validate_cv_submission(df, submission_type)
     else:  # full_dataset
         validate_full_dataset_submission(df)
 def validate_csv_file(file_content: str, submission_type: str = "GDPa1") -> None:

     ASSAY_LIST,
     CV_COLUMN,
     EXAMPLE_FILE_DICT,
+    GDPa1_path,
 )
+from evaluation import evaluate
 def validate_username(username: str) -> bool:
         raise gr.Error(
             f"❌ Fold assignments don't match canonical CV folds: {'; '.join(examples)}"
         )
 def validate_full_dataset_submission(df: pd.DataFrame) -> None:
         raise gr.Error(
             f"❌ CSV should have only one row per antibody. Found {n_duplicates} duplicates."
         )
+    example_df = pd.read_csv(EXAMPLE_FILE_DICT[submission_type])
     # All antibody names should be recognizable
     unrecognized_antibodies = set(df["antibody_name"]) - set(
+        example_df["antibody_name"].tolist()
     )
     if unrecognized_antibodies:
         raise gr.Error(
         )
     # All antibody names should be present
+    # Note(Lood): Technically we could check that the antibodies are present just for the property that needs to be predicted
+    missing_antibodies = set(example_df["antibody_name"].tolist()) - set(
         df["antibody_name"]
     )
     if missing_antibodies:
         validate_cv_submission(df, submission_type)
     else:  # full_dataset
         validate_full_dataset_submission(df)
+    # Check Spearman correlations on public set
+    df_gdpa1 = pd.read_csv(GDPa1_path)
+    if submission_type in ["GDPa1", "GDPa1_cross_validation"]:
+        results_df = evaluate(predictions_df=df, target_df=df_gdpa1, dataset_name=submission_type)
+        # Check that the Spearman correlations are not too high
+        if results_df["spearman"].max() > 0.9:
+            raise gr.Error(f"❌ Your submission shows abnormally high correlations (>0.9) on the public set. Please check that you're not overfitting on the public set and are using cross-validation if training a new model.\nIf you think this is a mistake, please contact [email protected].", duration=30)
 def validate_csv_file(file_content: str, submission_type: str = "GDPa1") -> None: