abdev-leaderboard

Running

App Files Files Community

loodvanniekerkginkgo commited on 12 days ago

Commit

2dafeb1

1 Parent(s): 7ae8833

Minor text edits and reformatting

Browse files

Files changed (6) hide show

about.py +16 -10
app.py +40 -27
constants.py +22 -4
data/example-predictions-heldout.csv +1 -1
submit.py +4 -2
utils.py +15 -9

about.py CHANGED Viewed

@@ -1,4 +1,10 @@
-from constants import ABOUT_TAB_NAME, ASSAY_LIST, SUBMIT_TAB_NAME, TERMS_URL, FAQ_TAB_NAME
 ABOUT_INTRO = f"""
 ## About this challenge
@@ -7,15 +13,15 @@ ABOUT_INTRO = f"""
 #### What is antibody developability and why is it important?
-Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
 Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
 Here we invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
 #### 🏆 Prizes
-For each of the 5 properties in the competition, there is a prize for the model with the highest performance for that property on the private test set.
 There is also an 'open-source' prize for the best model trained on the GDPa1 dataset (reporting cross-validation results) and assessed on the private test set where authors provide all training code and data.
-For each of these 6 prizes, participants have the choice between **$10k in data generation credits** with [Ginkgo Datapoints](https://datapoints.ginkgo.bio/) or a **cash prize** with a value of $2000.
 See the "{FAQ_TAB_NAME}" tab above (you are currently on the "{ABOUT_TAB_NAME}" tab) or the [competition terms]({TERMS_URL}) for more details.
 """
@@ -85,7 +91,7 @@ FAQS = {
     ),
     "How are winners determined?": (
         'There will be 6 prizes (one for each of the assay properties plus an "open-source" prize). '
-        'For the property-specific prizes, winners will be determined by the submission with the highest Spearman rank correlation coefficient on the private holdout set. '
         'For the "open-source" prize, this will be determined by the highest average Spearman across all properties. '
         "We reserve the right to award the open-source prize to a predictor with competitive results for a subset of properties (e.g. a top polyreactivity model)."
     ),
@@ -94,8 +100,8 @@ FAQS = {
     ),
     "What do I need to submit?": (
         'There is a tab on the Hugging Face competition page to upload predictions for datasets - for each dataset participants need to submit a CSV containing a column for each property they would like to predict (e.g. called "HIC"), '
-        'and a row with the sequence matching the sequence in the input file. These predictions are then evaluated in the backend using the Spearman rank correlation between predictions and experimental values, and these metrics are then added to the leaderboard. '
-        'Predictions remain private and are not seen by other contestants.'
     ),
     "Can I submit predictions for only one property?": (
         "Yes. You do not need to predict all 5 properties to participate. Each property has its own leaderboard and prize, so you may submit models for a subset of the assays if you wish."
@@ -118,7 +124,7 @@ FAQS = {
 SUBMIT_INTRUCTIONS = f"""
 # Antibody Developability Submission
 Upload a CSV to get a score!
-List of valid property names: `{', '.join(ASSAY_LIST)}`.
 You do **not** need to predict all 5 properties — each property has its own leaderboard and prize.
@@ -126,11 +132,11 @@ You do **not** need to predict all 5 properties — each property has its own le
 1. **Submit your predictions** as a CSV with `antibody_name` + one column per property you are predicting (e.g. `"antibody_name,Titer,PR_CHO"` if your model predicts Titer and Polyreactivity).
 2. **Final test submission**: Download test sequences from the example files below and upload predictions.
-The validation set results should appear on the leaderboard within a minute. The **private test set results will not appear on the leaderboards**, and will be used to determine the winners at the close of the competition.
 We may release private test set results at intermediate points during the competition.
 ## Cross-validation
-For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
 Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
 """

+from constants import (
+    ABOUT_TAB_NAME,
+    ASSAY_LIST,
+    SUBMIT_TAB_NAME,
+    TERMS_URL,
+    FAQ_TAB_NAME,
+)
 ABOUT_INTRO = f"""
 ## About this challenge
 #### What is antibody developability and why is it important?
+Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
 Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
 Here we invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
 #### 🏆 Prizes
+For each of the 5 properties in the competition, there is a prize for the model with the highest performance for that property on the private test set.
 There is also an 'open-source' prize for the best model trained on the GDPa1 dataset (reporting cross-validation results) and assessed on the private test set where authors provide all training code and data.
+For each of these 6 prizes, participants have the choice between **$10k in data generation credits** with [Ginkgo Datapoints](https://datapoints.ginkgo.bio/) or a **cash prize** with a value of $2000.
 See the "{FAQ_TAB_NAME}" tab above (you are currently on the "{ABOUT_TAB_NAME}" tab) or the [competition terms]({TERMS_URL}) for more details.
 """
     ),
     "How are winners determined?": (
         'There will be 6 prizes (one for each of the assay properties plus an "open-source" prize). '
+        "For the property-specific prizes, winners will be determined by the submission with the highest Spearman rank correlation coefficient on the private holdout set. "
         'For the "open-source" prize, this will be determined by the highest average Spearman across all properties. '
         "We reserve the right to award the open-source prize to a predictor with competitive results for a subset of properties (e.g. a top polyreactivity model)."
     ),
     ),
     "What do I need to submit?": (
         'There is a tab on the Hugging Face competition page to upload predictions for datasets - for each dataset participants need to submit a CSV containing a column for each property they would like to predict (e.g. called "HIC"), '
+        "and a row with the sequence matching the sequence in the input file. These predictions are then evaluated in the backend using the Spearman rank correlation between predictions and experimental values, and these metrics are then added to the leaderboard. "
+        "Predictions remain private and are not seen by other contestants."
     ),
     "Can I submit predictions for only one property?": (
         "Yes. You do not need to predict all 5 properties to participate. Each property has its own leaderboard and prize, so you may submit models for a subset of the assays if you wish."
 SUBMIT_INTRUCTIONS = f"""
 # Antibody Developability Submission
 Upload a CSV to get a score!
+List of valid property names: `{', '.join(ASSAY_LIST)}`.
 You do **not** need to predict all 5 properties — each property has its own leaderboard and prize.
 1. **Submit your predictions** as a CSV with `antibody_name` + one column per property you are predicting (e.g. `"antibody_name,Titer,PR_CHO"` if your model predicts Titer and Polyreactivity).
 2. **Final test submission**: Download test sequences from the example files below and upload predictions.
+The validation set results should appear on the leaderboard within a minute. The **private test set results will not appear on the leaderboards**, and will be used to determine the winners at the close of the competition.
 We may release private test set results at intermediate points during the competition.
 ## Cross-validation
+For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
 Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
 """

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import pandas as pd
 import gradio as gr
-from gradio.themes.utils import colors, fonts, sizes
 from gradio_leaderboard import Leaderboard
 from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
 from constants import (
-    ASSAY_RENAME,  # keep this: used in df query
     EXAMPLE_FILE_DICT,
     LEADERBOARD_DISPLAY_COLUMNS,
     ABOUT_TAB_NAME,
@@ -19,6 +19,7 @@ from constants import (
 from submit import make_submission
 from utils import fetch_hf_results, show_output_box
 def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
     df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
     if assay is not None:
@@ -29,8 +30,10 @@ def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None)
     # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
     # Convert spearman column to string to avoid dtype incompatibility when assigning text
     df["spearman"] = df["spearman"].astype(str)
-    df.loc[(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"] = "N/A, evaluated at competition close"
     # Finally, rename columns for readability
     df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
     return df
@@ -46,8 +49,10 @@ def get_leaderboard_object(assay: str | None = None):
     lb = Leaderboard(
         value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
         datatype=["str", "str", "str", "number"],
-        select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(["model", "property", "spearman", "dataset"]),
-        search_columns=["Model Name"],
         filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
         every=15,
         render=True,
@@ -62,32 +67,30 @@ current_dataframe = fetch_hf_results()
 with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
     timer = gr.Timer(3)  # Run every 3 seconds when page is focused
     data_version = gr.State(value=0)  # Track data changes
     def update_current_dataframe():
         global current_dataframe
         new_dataframe = fetch_hf_results()
         # Check if data has actually changed
         if not current_dataframe.equals(new_dataframe):
             current_dataframe = new_dataframe
             return data_version.value + 1  # Increment version to trigger updates
         return data_version.value
     timer.tick(fn=update_current_dataframe, outputs=data_version)
     ## Header
     with gr.Row():
         with gr.Column(scale=6):  # bigger text area
             gr.Markdown(
                 f"""
                 ## Welcome to the Ginkgo Antibody Developability Benchmark!
-                **Beta version, not publicly launched yet**
                 Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "✉️ Submit" tab).
-                You can **predict any or all of the 5 properties**, and each property has its own leaderboard.
                 See more details in the "{ABOUT_TAB_NAME}" tab.
                 """
             )
@@ -96,13 +99,18 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
                 value="./assets/competition_logo.jpg",
                 show_label=False,
                 show_download_button=False,
-                width="25vw", # Take up the width of the column (2/8 = 1/4)
             )
     with gr.Tabs(elem_classes="tab-buttons"):
         with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
             gr.Markdown(ABOUT_INTRO)
-            gr.Image(value="./assets/prediction_explainer.png", show_label=False, show_download_button=False, width="50vw")
             gr.Markdown(ABOUT_TEXT)
         # Procedurally make these 5 tabs
@@ -113,26 +121,31 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
         #     ) as tab_item:
         #         gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
         #         lb = get_leaderboard_object(assay=assay)
         #         def refresh_leaderboard(assay=assay):
         #             return format_leaderboard_table(df_results=current_dataframe, assay=assay)
         #         # Refresh when data version changes
         #         data_version.change(fn=refresh_leaderboard, outputs=lb)
         # Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
-        with gr.TabItem("🏆 Leaderboard", elem_id="abdev-benchmark-tab-table") as leaderboard_tab:
             gr.Markdown(
-                "# Overall Leaderboard (filter below by property)"  # TODO add details about the 6 prizes here
             )
             lb = get_leaderboard_object()
             def refresh_overall_leaderboard():
                 return format_leaderboard_table(df_results=current_dataframe)
             # Refresh when data version changes
             data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
             # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
             # gr.Markdown(
             #     "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
@@ -245,7 +258,7 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
                 question = f"{i+1}. {question}"
                 with gr.Accordion(question, open=False):
                     gr.Markdown(f"*{answer}*")  # Italics for answers
     # Footnote
     gr.Markdown(
         f"""
@@ -258,4 +271,4 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
     )
 if __name__ == "__main__":
-    demo.launch(ssr_mode=False)

 import pandas as pd
 import gradio as gr
+from gradio.themes.utils import sizes
 from gradio_leaderboard import Leaderboard
 from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
 from constants import (
+    ASSAY_RENAME,  # noqa: F401
     EXAMPLE_FILE_DICT,
     LEADERBOARD_DISPLAY_COLUMNS,
     ABOUT_TAB_NAME,
 from submit import make_submission
 from utils import fetch_hf_results, show_output_box
 def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
     df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
     if assay is not None:
     # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
     # Convert spearman column to string to avoid dtype incompatibility when assigning text
     df["spearman"] = df["spearman"].astype(str)
+    df.loc[
+        (df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"
+    ] = "N/A, evaluated at competition close"
     # Finally, rename columns for readability
     df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
     return df
     lb = Leaderboard(
         value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
         datatype=["str", "str", "str", "number"],
+        select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(
+            ["model", "property", "spearman", "dataset"]
+        ),
+        search_columns=["Model Name"],
         filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
         every=15,
         render=True,
 with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
     timer = gr.Timer(3)  # Run every 3 seconds when page is focused
     data_version = gr.State(value=0)  # Track data changes
     def update_current_dataframe():
         global current_dataframe
         new_dataframe = fetch_hf_results()
         # Check if data has actually changed
         if not current_dataframe.equals(new_dataframe):
             current_dataframe = new_dataframe
             return data_version.value + 1  # Increment version to trigger updates
         return data_version.value
     timer.tick(fn=update_current_dataframe, outputs=data_version)
     ## Header
     with gr.Row():
         with gr.Column(scale=6):  # bigger text area
             gr.Markdown(
                 f"""
                 ## Welcome to the Ginkgo Antibody Developability Benchmark!
                 Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "✉️ Submit" tab).
+                You can **predict any or all of the 5 properties**, and you can filter the main leaderboard by property.
                 See more details in the "{ABOUT_TAB_NAME}" tab.
                 """
             )
                 value="./assets/competition_logo.jpg",
                 show_label=False,
                 show_download_button=False,
+                width="25vw",  # Take up the width of the column (2/8 = 1/4)
             )
     with gr.Tabs(elem_classes="tab-buttons"):
         with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
             gr.Markdown(ABOUT_INTRO)
+            gr.Image(
+                value="./assets/prediction_explainer.png",
+                show_label=False,
+                show_download_button=False,
+                width="50vw",
+            )
             gr.Markdown(ABOUT_TEXT)
         # Procedurally make these 5 tabs
         #     ) as tab_item:
         #         gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
         #         lb = get_leaderboard_object(assay=assay)
         #         def refresh_leaderboard(assay=assay):
         #             return format_leaderboard_table(df_results=current_dataframe, assay=assay)
         #         # Refresh when data version changes
         #         data_version.change(fn=refresh_leaderboard, outputs=lb)
         # Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
+        with gr.TabItem(
+            "🏆 Leaderboard", elem_id="abdev-benchmark-tab-table"
+        ) as leaderboard_tab:
             gr.Markdown(
+                """
+                # Overall Leaderboard (filter below by property)
+                Each property has its own prize, and participants can submit models for any combination of properties.
+                """
             )
             lb = get_leaderboard_object()
             def refresh_overall_leaderboard():
                 return format_leaderboard_table(df_results=current_dataframe)
             # Refresh when data version changes
             data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
             # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
             # gr.Markdown(
             #     "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
                 question = f"{i+1}. {question}"
                 with gr.Accordion(question, open=False):
                     gr.Markdown(f"*{answer}*")  # Italics for answers
     # Footnote
     gr.Markdown(
         f"""
     )
 if __name__ == "__main__":
+    demo.launch(ssr_mode=False, share=True)

constants.py CHANGED Viewed

@@ -55,7 +55,9 @@ ANTIBODY_NAMES_DICT = {
     "GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
         "antibody_name"
     ].tolist(),
-    "Heldout Test Set": pd.read_csv(EXAMPLE_FILE_DICT["Heldout Test Set"])["antibody_name"].tolist(),
 }
 # Huggingface API
@@ -69,8 +71,22 @@ SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
 RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
 # Leaderboard dataframes
-LEADERBOARD_RESULTS_COLUMNS = ["model", "assay", "spearman", "dataset", "user", "submission_time"]  # The columns expected from the results dataset
-LEADERBOARD_DISPLAY_COLUMNS = ["model", "property", "spearman", "dataset", "user", "submission_time"]  # After changing assay to property (pretty formatting)
 LEADERBOARD_COLUMNS_RENAME = {
     "spearman": "Spearman Correlation",
     "dataset": "Dataset",
@@ -79,5 +95,7 @@ LEADERBOARD_COLUMNS_RENAME = {
     "model": "Model Name",
     "property": "Property",
 }
 def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
-    return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x,x), columns))

     "GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
         "antibody_name"
     ].tolist(),
+    "Heldout Test Set": pd.read_csv(EXAMPLE_FILE_DICT["Heldout Test Set"])[
+        "antibody_name"
+    ].tolist(),
 }
 # Huggingface API
 RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
 # Leaderboard dataframes
+LEADERBOARD_RESULTS_COLUMNS = [
+    "model",
+    "assay",
+    "spearman",
+    "dataset",
+    "user",
+    "submission_time",
+]  # The columns expected from the results dataset
+LEADERBOARD_DISPLAY_COLUMNS = [
+    "model",
+    "property",
+    "spearman",
+    "dataset",
+    "user",
+    "submission_time",
+]  # After changing assay to property (pretty formatting)
 LEADERBOARD_COLUMNS_RENAME = {
     "spearman": "Spearman Correlation",
     "dataset": "Dataset",
     "model": "Model Name",
     "property": "Property",
 }
 def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
+    return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x, x), columns))

data/example-predictions-heldout.csv CHANGED Viewed

@@ -78,4 +78,4 @@ P907-A14-unary-estuary-9ae8d,EVQLVESGGGLVQPGGSLRLSCAASGFTFSRYWMSWVRQAPGKGLEWVANI
 P907-A14-undirected-hull-8daff,QMQLVQSGAEVRKPGASVKVSCKASGYTFTGHYIHWVRQAPGRGPEWMGWINPNSGGTNSSQSFQGRVTMTRDTSISTAYMELSRLTSDDTAVYSCARARYGDYYYFDSWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRASQDISSYLAWYQQKPEKAPKSLIYAASSLQGGVPSRFSGSGSGTHFTLTISSLQPEDFATYYCQQYYSYPVTFGPGTKVDIK,QMQLVQS-GAEVRKPGASVKVSCKASG-YTFTG-----HYIHWVRQAPGRGPEWMGWINPN---SGGTNSSQSFQGRVTMTRDTSISTAYMELSRLTSDDTAVYSCARARYGD-------------------YYYFDSWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRAS--QDIS------SYLAWYQQKPEKAPKSLIYA--------ASSLQGGVPSRFSGSGSG--THFTLTISSLQPEDFATYYCQQYYS-----------------------YPVTFGPGTKVDIK-,IgG1,Kappa
 P907-A14-vain-bucket-0f231,QVQLQQWGAGLLKPSETLSLTCAVYNGSSSAHYWSWVRQPPGKGLEWIGEISHGGSTTYNPSLKGRVSISVDTPKNQFSLNLSSVTAADTAVYYCATRAIHFRNRNFYSFYVEVWGKGTTVTVSS,EIVLTQSPGTLSLSPGERATLSCRASQSVSSSKLVWYQQRPGQAPRPLIYGASSRATGIPDRFSGSGSETDFTLTISWLEPEDFAVYYCHQYGSSPRTFGQGTKVEIK,QVQLQQW-GAGLLKPSETLSLTCAVYN-GSSSA-----HYWSWVRQPPGKGLEWIGEISH----GGSTTYNPSLKGRVSISVDTPKNQFSLNLSSVTAADTAVYYCATRAIHFRNR-------------NFYSFYVEVWGKGTTVTVSS,EIVLTQSPGTLSLSPGERATLSCRAS--QSVSS-----SKLVWYQQRPGQAPRPLIYG--------ASSRATGIPDRFSGSGSE--TDFTLTISWLEPEDFAVYYCHQYGS-----------------------SPRTFGQGTKVEIK-,IgG1,Kappa
 P907-A14-wintry-couple-24188,QVQLQQWGAGLLKPSETLSVTCAVYGGSFIGSSWIWIRQPPEKGLEWIGEINHGGSTTYNPSLKSRVTISLDMSKNQFSLNLTSVTAADTAVYYCATDRGSLAAVDWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRASQAISSYLAWYQQKPGKVPKLLIYAASTLQSGVASRFTGSGSGTDFTLTISSLQPEDVATYYCQKYNSAPRTFGQGTRVEIK,QVQLQQW-GAGLLKPSETLSVTCAVYG-GSFIG-----SSWIWIRQPPEKGLEWIGEINH----GGSTTYNPSLKSRVTISLDMSKNQFSLNLTSVTAADTAVYYCATDRGS---------------------LAAVDWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRAS--QAIS------SYLAWYQQKPGKVPKLLIYA--------ASTLQSGVASRFTGSGSG--TDFTLTISSLQPEDVATYYCQKYNS-----------------------APRTFGQGTRVEIK-,IgG1,Kappa
-P907-A14-witty-fugue-86932,EVQLVESGGGLVQPGRSLRLSCTASGFTFGDYAMNWVRQAPGKGLEWLGFIESKGYGGTTEYAASVKGRFIISRDDSKSIAYLQMNSLKTEDTAVYYCTPGDYWGQGTLVTVSS,SYELTQPPSVSVSPGQTARITCSGDALPKKYAYWYQQKSGQAPVQVIYEDSGRPSGIPERFSGSSSGTMATLTISGAQVEDEADYYCYSIDSSGNHRVFGGGTKLTVL,EVQLVES-GGGLVQPGRSLRLSCTASG-FTFGD-----YAMNWVRQAPGKGLEWLGFIESKG-YGGTTEYAASVKGRFIISRDDSKSIAYLQMNSLKTEDTAVYYCTPG---------------------------DYWGQGTLVTVSS,SYELTQP-PSVSVSPGQTARITCSGD---ALPK-----KYAYWYQQKSGQAPVQVIYE--------DSGRPSGIPERFSGSSSG--TMATLTISGAQVEDEADYYCYSIDSS---------------------GNHRVFGGGTKLTVL-,IgG1,Lambda

 P907-A14-undirected-hull-8daff,QMQLVQSGAEVRKPGASVKVSCKASGYTFTGHYIHWVRQAPGRGPEWMGWINPNSGGTNSSQSFQGRVTMTRDTSISTAYMELSRLTSDDTAVYSCARARYGDYYYFDSWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRASQDISSYLAWYQQKPEKAPKSLIYAASSLQGGVPSRFSGSGSGTHFTLTISSLQPEDFATYYCQQYYSYPVTFGPGTKVDIK,QMQLVQS-GAEVRKPGASVKVSCKASG-YTFTG-----HYIHWVRQAPGRGPEWMGWINPN---SGGTNSSQSFQGRVTMTRDTSISTAYMELSRLTSDDTAVYSCARARYGD-------------------YYYFDSWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRAS--QDIS------SYLAWYQQKPEKAPKSLIYA--------ASSLQGGVPSRFSGSGSG--THFTLTISSLQPEDFATYYCQQYYS-----------------------YPVTFGPGTKVDIK-,IgG1,Kappa
 P907-A14-vain-bucket-0f231,QVQLQQWGAGLLKPSETLSLTCAVYNGSSSAHYWSWVRQPPGKGLEWIGEISHGGSTTYNPSLKGRVSISVDTPKNQFSLNLSSVTAADTAVYYCATRAIHFRNRNFYSFYVEVWGKGTTVTVSS,EIVLTQSPGTLSLSPGERATLSCRASQSVSSSKLVWYQQRPGQAPRPLIYGASSRATGIPDRFSGSGSETDFTLTISWLEPEDFAVYYCHQYGSSPRTFGQGTKVEIK,QVQLQQW-GAGLLKPSETLSLTCAVYN-GSSSA-----HYWSWVRQPPGKGLEWIGEISH----GGSTTYNPSLKGRVSISVDTPKNQFSLNLSSVTAADTAVYYCATRAIHFRNR-------------NFYSFYVEVWGKGTTVTVSS,EIVLTQSPGTLSLSPGERATLSCRAS--QSVSS-----SKLVWYQQRPGQAPRPLIYG--------ASSRATGIPDRFSGSGSE--TDFTLTISWLEPEDFAVYYCHQYGS-----------------------SPRTFGQGTKVEIK-,IgG1,Kappa
 P907-A14-wintry-couple-24188,QVQLQQWGAGLLKPSETLSVTCAVYGGSFIGSSWIWIRQPPEKGLEWIGEINHGGSTTYNPSLKSRVTISLDMSKNQFSLNLTSVTAADTAVYYCATDRGSLAAVDWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRASQAISSYLAWYQQKPGKVPKLLIYAASTLQSGVASRFTGSGSGTDFTLTISSLQPEDVATYYCQKYNSAPRTFGQGTRVEIK,QVQLQQW-GAGLLKPSETLSVTCAVYG-GSFIG-----SSWIWIRQPPEKGLEWIGEINH----GGSTTYNPSLKSRVTISLDMSKNQFSLNLTSVTAADTAVYYCATDRGS---------------------LAAVDWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRAS--QAIS------SYLAWYQQKPGKVPKLLIYA--------ASTLQSGVASRFTGSGSG--TDFTLTISSLQPEDVATYYCQKYNS-----------------------APRTFGQGTRVEIK-,IgG1,Kappa
+P907-A14-witty-fugue-86932,EVQLVESGGGLVQPGRSLRLSCTASGFTFGDYAMNWVRQAPGKGLEWLGFIESKGYGGTTEYAASVKGRFIISRDDSKSIAYLQMNSLKTEDTAVYYCTPGDYWGQGTLVTVSS,SYELTQPPSVSVSPGQTARITCSGDALPKKYAYWYQQKSGQAPVQVIYEDSGRPSGIPERFSGSSSGTMATLTISGAQVEDEADYYCYSIDSSGNHRVFGGGTKLTVL,EVQLVES-GGGLVQPGRSLRLSCTASG-FTFGD-----YAMNWVRQAPGKGLEWLGFIESKG-YGGTTEYAASVKGRFIISRDDSKSIAYLQMNSLKTEDTAVYYCTPG---------------------------DYWGQGTLVTVSS,SYELTQP-PSVSVSPGQTARITCSGD---ALPK-----KYAYWYQQKSGQAPVQVIYE--------DSGRPSGIPERFSGSSSG--TMATLTISGAQVEDEADYYCYSIDSS---------------------GNHRVFGGGTKLTVL-,IgG1,Lambda

submit.py CHANGED Viewed

@@ -3,7 +3,6 @@ import tempfile
 from typing import BinaryIO
 import json
-from click import pass_obj
 import gradio as gr
 from datetime import datetime, timezone
 import uuid
@@ -58,6 +57,7 @@ def upload_submission(
         )
         Path(tmp_name).unlink()
 def make_submission(
     submitted_file: BinaryIO,
     user_state,
@@ -79,7 +79,9 @@ def make_submission(
         model_description = ""
         # raise gr.Error("Please provide a model description.") # Not mandatory anymore
     if str(registration_code).strip().upper() != REGISTRATION_CODE:
-        raise gr.Error("Invalid registration code. Please register on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>.")
     if submitted_file is None:
         raise gr.Error("Please upload a CSV file before submitting.")

 from typing import BinaryIO
 import json
 import gradio as gr
 from datetime import datetime, timezone
 import uuid
         )
         Path(tmp_name).unlink()
 def make_submission(
     submitted_file: BinaryIO,
     user_state,
         model_description = ""
         # raise gr.Error("Please provide a model description.") # Not mandatory anymore
     if str(registration_code).strip().upper() != REGISTRATION_CODE:
+        raise gr.Error(
+            "Invalid registration code. Please register on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>."
+        )
     if submitted_file is None:
         raise gr.Error("Please upload a CSV file before submitting.")

utils.py CHANGED Viewed

@@ -5,12 +5,13 @@ import hashlib
 from typing import Iterable, Union
 from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS
-pd.set_option('display.max_columns', None)
 def show_output_box(message):
     return gr.update(value=message, visible=True)
 def anonymize_user(username: str) -> str:
     # Anonymize using a hash of the username
     return hashlib.sha256(username.encode()).hexdigest()[:8]
@@ -20,16 +21,21 @@ def fetch_hf_results():
     # For debugging
     # # Print current time in EST
     # EST = timezone(timedelta(hours=-4))
-    # print(f"tmp: Fetching results from HF at {datetime.now(EST)}")
     # Should cache by default if not using force_redownload
     df = load_dataset(
-        RESULTS_REPO, data_files="auto_submissions/metrics_all.csv",
     )["train"].to_pandas()
-    assert all(col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_COLUMNS) - set(df.columns)}"
     # Show latest submission only
-    df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay", "user"], keep="first")
     df["property"] = df["assay"].map(ASSAY_RENAME)
     # Anonymize the user column at this point
     df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
@@ -66,14 +72,14 @@ def readable_hash(
     data: Union[str, bytes, Iterable[int]],
     *,
     salt: Union[str, bytes, None] = None,
-    words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS+NOUNS),
     sep: str = "-",
     checksum_len: int = 2,  # 0 to disable; 2–3 is plenty
-    case: str = "lower"     # "lower" | "title" | "upper"
 ) -> str:
     """
     Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.
     Examples
     --------
     >>> readable_hash("hello world")

 from typing import Iterable, Union
 from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS
+pd.set_option("display.max_columns", None)
 def show_output_box(message):
     return gr.update(value=message, visible=True)
 def anonymize_user(username: str) -> str:
     # Anonymize using a hash of the username
     return hashlib.sha256(username.encode()).hexdigest()[:8]
     # For debugging
     # # Print current time in EST
     # EST = timezone(timedelta(hours=-4))
+    # print(f"tmp: Fetching results from HF at {datetime.now(EST)}")
     # Should cache by default if not using force_redownload
     df = load_dataset(
+        RESULTS_REPO,
+        data_files="auto_submissions/metrics_all.csv",
     )["train"].to_pandas()
+    assert all(
+        col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
+    ), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
     # Show latest submission only
+    df = df.sort_values("submission_time", ascending=False).drop_duplicates(
+        subset=["model", "assay", "user"], keep="first"
+    )
     df["property"] = df["assay"].map(ASSAY_RENAME)
     # Anonymize the user column at this point
     df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
     data: Union[str, bytes, Iterable[int]],
     *,
     salt: Union[str, bytes, None] = None,
+    words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS + NOUNS),
     sep: str = "-",
     checksum_len: int = 2,  # 0 to disable; 2–3 is plenty
+    case: str = "lower",  # "lower" | "title" | "upper"
 ) -> str:
     """
     Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.
     Examples
     --------
     >>> readable_hash("hello world")