loodvanniekerkginkgo commited on
Commit
2dafeb1
·
1 Parent(s): 7ae8833

Minor text edits and reformatting

Browse files
Files changed (6) hide show
  1. about.py +16 -10
  2. app.py +40 -27
  3. constants.py +22 -4
  4. data/example-predictions-heldout.csv +1 -1
  5. submit.py +4 -2
  6. utils.py +15 -9
about.py CHANGED
@@ -1,4 +1,10 @@
1
- from constants import ABOUT_TAB_NAME, ASSAY_LIST, SUBMIT_TAB_NAME, TERMS_URL, FAQ_TAB_NAME
 
 
 
 
 
 
2
 
3
  ABOUT_INTRO = f"""
4
  ## About this challenge
@@ -7,15 +13,15 @@ ABOUT_INTRO = f"""
7
 
8
  #### What is antibody developability and why is it important?
9
 
10
- Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
11
  Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
12
  Here we invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
13
 
14
  #### 🏆 Prizes
15
 
16
- For each of the 5 properties in the competition, there is a prize for the model with the highest performance for that property on the private test set.
17
  There is also an 'open-source' prize for the best model trained on the GDPa1 dataset (reporting cross-validation results) and assessed on the private test set where authors provide all training code and data.
18
- For each of these 6 prizes, participants have the choice between **$10k in data generation credits** with [Ginkgo Datapoints](https://datapoints.ginkgo.bio/) or a **cash prize** with a value of $2000.
19
 
20
  See the "{FAQ_TAB_NAME}" tab above (you are currently on the "{ABOUT_TAB_NAME}" tab) or the [competition terms]({TERMS_URL}) for more details.
21
  """
@@ -85,7 +91,7 @@ FAQS = {
85
  ),
86
  "How are winners determined?": (
87
  'There will be 6 prizes (one for each of the assay properties plus an "open-source" prize). '
88
- 'For the property-specific prizes, winners will be determined by the submission with the highest Spearman rank correlation coefficient on the private holdout set. '
89
  'For the "open-source" prize, this will be determined by the highest average Spearman across all properties. '
90
  "We reserve the right to award the open-source prize to a predictor with competitive results for a subset of properties (e.g. a top polyreactivity model)."
91
  ),
@@ -94,8 +100,8 @@ FAQS = {
94
  ),
95
  "What do I need to submit?": (
96
  'There is a tab on the Hugging Face competition page to upload predictions for datasets - for each dataset participants need to submit a CSV containing a column for each property they would like to predict (e.g. called "HIC"), '
97
- 'and a row with the sequence matching the sequence in the input file. These predictions are then evaluated in the backend using the Spearman rank correlation between predictions and experimental values, and these metrics are then added to the leaderboard. '
98
- 'Predictions remain private and are not seen by other contestants.'
99
  ),
100
  "Can I submit predictions for only one property?": (
101
  "Yes. You do not need to predict all 5 properties to participate. Each property has its own leaderboard and prize, so you may submit models for a subset of the assays if you wish."
@@ -118,7 +124,7 @@ FAQS = {
118
  SUBMIT_INTRUCTIONS = f"""
119
  # Antibody Developability Submission
120
  Upload a CSV to get a score!
121
- List of valid property names: `{', '.join(ASSAY_LIST)}`.
122
 
123
  You do **not** need to predict all 5 properties — each property has its own leaderboard and prize.
124
 
@@ -126,11 +132,11 @@ You do **not** need to predict all 5 properties — each property has its own le
126
  1. **Submit your predictions** as a CSV with `antibody_name` + one column per property you are predicting (e.g. `"antibody_name,Titer,PR_CHO"` if your model predicts Titer and Polyreactivity).
127
  2. **Final test submission**: Download test sequences from the example files below and upload predictions.
128
 
129
- The validation set results should appear on the leaderboard within a minute. The **private test set results will not appear on the leaderboards**, and will be used to determine the winners at the close of the competition.
130
  We may release private test set results at intermediate points during the competition.
131
 
132
  ## Cross-validation
133
 
134
- For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
135
  Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
136
  """
 
1
+ from constants import (
2
+ ABOUT_TAB_NAME,
3
+ ASSAY_LIST,
4
+ SUBMIT_TAB_NAME,
5
+ TERMS_URL,
6
+ FAQ_TAB_NAME,
7
+ )
8
 
9
  ABOUT_INTRO = f"""
10
  ## About this challenge
 
13
 
14
  #### What is antibody developability and why is it important?
15
 
16
+ Antibodies have to be manufacturable, stable in high concentrations, and have low off-target effects.
17
  Properties such as these can often hinder the progression of an antibody to the clinic, and are collectively referred to as 'developability'.
18
  Here we invite the community to submit and develop better predictors, which will be tested out on a heldout private set to assess model generalization.
19
 
20
  #### 🏆 Prizes
21
 
22
+ For each of the 5 properties in the competition, there is a prize for the model with the highest performance for that property on the private test set.
23
  There is also an 'open-source' prize for the best model trained on the GDPa1 dataset (reporting cross-validation results) and assessed on the private test set where authors provide all training code and data.
24
+ For each of these 6 prizes, participants have the choice between **$10k in data generation credits** with [Ginkgo Datapoints](https://datapoints.ginkgo.bio/) or a **cash prize** with a value of $2000.
25
 
26
  See the "{FAQ_TAB_NAME}" tab above (you are currently on the "{ABOUT_TAB_NAME}" tab) or the [competition terms]({TERMS_URL}) for more details.
27
  """
 
91
  ),
92
  "How are winners determined?": (
93
  'There will be 6 prizes (one for each of the assay properties plus an "open-source" prize). '
94
+ "For the property-specific prizes, winners will be determined by the submission with the highest Spearman rank correlation coefficient on the private holdout set. "
95
  'For the "open-source" prize, this will be determined by the highest average Spearman across all properties. '
96
  "We reserve the right to award the open-source prize to a predictor with competitive results for a subset of properties (e.g. a top polyreactivity model)."
97
  ),
 
100
  ),
101
  "What do I need to submit?": (
102
  'There is a tab on the Hugging Face competition page to upload predictions for datasets - for each dataset participants need to submit a CSV containing a column for each property they would like to predict (e.g. called "HIC"), '
103
+ "and a row with the sequence matching the sequence in the input file. These predictions are then evaluated in the backend using the Spearman rank correlation between predictions and experimental values, and these metrics are then added to the leaderboard. "
104
+ "Predictions remain private and are not seen by other contestants."
105
  ),
106
  "Can I submit predictions for only one property?": (
107
  "Yes. You do not need to predict all 5 properties to participate. Each property has its own leaderboard and prize, so you may submit models for a subset of the assays if you wish."
 
124
  SUBMIT_INTRUCTIONS = f"""
125
  # Antibody Developability Submission
126
  Upload a CSV to get a score!
127
+ List of valid property names: `{', '.join(ASSAY_LIST)}`.
128
 
129
  You do **not** need to predict all 5 properties — each property has its own leaderboard and prize.
130
 
 
132
  1. **Submit your predictions** as a CSV with `antibody_name` + one column per property you are predicting (e.g. `"antibody_name,Titer,PR_CHO"` if your model predicts Titer and Polyreactivity).
133
  2. **Final test submission**: Download test sequences from the example files below and upload predictions.
134
 
135
+ The validation set results should appear on the leaderboard within a minute. The **private test set results will not appear on the leaderboards**, and will be used to determine the winners at the close of the competition.
136
  We may release private test set results at intermediate points during the competition.
137
 
138
  ## Cross-validation
139
 
140
+ For the cross-validation metrics (if training only on the GDPa1 dataset), use the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column to split the dataset into folds and make predictions for each of the folds.
141
  Submit a CSV file in the same format but also containing the `"hierarchical_cluster_IgG_isotype_stratified_fold"` column.
142
  """
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import pandas as pd
2
 
3
  import gradio as gr
4
- from gradio.themes.utils import colors, fonts, sizes
5
  from gradio_leaderboard import Leaderboard
6
 
7
  from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
8
  from constants import (
9
- ASSAY_RENAME, # keep this: used in df query
10
  EXAMPLE_FILE_DICT,
11
  LEADERBOARD_DISPLAY_COLUMNS,
12
  ABOUT_TAB_NAME,
@@ -19,6 +19,7 @@ from constants import (
19
  from submit import make_submission
20
  from utils import fetch_hf_results, show_output_box
21
 
 
22
  def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
23
  df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
24
  if assay is not None:
@@ -29,8 +30,10 @@ def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None)
29
  # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
30
  # Convert spearman column to string to avoid dtype incompatibility when assigning text
31
  df["spearman"] = df["spearman"].astype(str)
32
- df.loc[(df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"] = "N/A, evaluated at competition close"
33
-
 
 
34
  # Finally, rename columns for readability
35
  df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
36
  return df
@@ -46,8 +49,10 @@ def get_leaderboard_object(assay: str | None = None):
46
  lb = Leaderboard(
47
  value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
48
  datatype=["str", "str", "str", "number"],
49
- select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(["model", "property", "spearman", "dataset"]),
50
- search_columns=["Model Name"],
 
 
51
  filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
52
  every=15,
53
  render=True,
@@ -62,32 +67,30 @@ current_dataframe = fetch_hf_results()
62
  with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
63
  timer = gr.Timer(3) # Run every 3 seconds when page is focused
64
  data_version = gr.State(value=0) # Track data changes
65
-
66
  def update_current_dataframe():
67
  global current_dataframe
68
  new_dataframe = fetch_hf_results()
69
-
70
  # Check if data has actually changed
71
  if not current_dataframe.equals(new_dataframe):
72
  current_dataframe = new_dataframe
73
  return data_version.value + 1 # Increment version to trigger updates
74
  return data_version.value
75
-
76
  timer.tick(fn=update_current_dataframe, outputs=data_version)
77
-
78
  ## Header
79
-
80
  with gr.Row():
81
  with gr.Column(scale=6): # bigger text area
82
  gr.Markdown(
83
  f"""
84
  ## Welcome to the Ginkgo Antibody Developability Benchmark!
85
 
86
- **Beta version, not publicly launched yet**
87
-
88
  Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "✉️ Submit" tab).
89
-
90
- You can **predict any or all of the 5 properties**, and each property has its own leaderboard.
91
  See more details in the "{ABOUT_TAB_NAME}" tab.
92
  """
93
  )
@@ -96,13 +99,18 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
96
  value="./assets/competition_logo.jpg",
97
  show_label=False,
98
  show_download_button=False,
99
- width="25vw", # Take up the width of the column (2/8 = 1/4)
100
  )
101
-
102
  with gr.Tabs(elem_classes="tab-buttons"):
103
  with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
104
  gr.Markdown(ABOUT_INTRO)
105
- gr.Image(value="./assets/prediction_explainer.png", show_label=False, show_download_button=False, width="50vw")
 
 
 
 
 
106
  gr.Markdown(ABOUT_TEXT)
107
 
108
  # Procedurally make these 5 tabs
@@ -113,26 +121,31 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
113
  # ) as tab_item:
114
  # gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
115
  # lb = get_leaderboard_object(assay=assay)
116
-
117
  # def refresh_leaderboard(assay=assay):
118
  # return format_leaderboard_table(df_results=current_dataframe, assay=assay)
119
-
120
  # # Refresh when data version changes
121
  # data_version.change(fn=refresh_leaderboard, outputs=lb)
122
 
123
  # Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
124
- with gr.TabItem("🏆 Leaderboard", elem_id="abdev-benchmark-tab-table") as leaderboard_tab:
 
 
125
  gr.Markdown(
126
- "# Overall Leaderboard (filter below by property)" # TODO add details about the 6 prizes here
 
 
 
127
  )
128
  lb = get_leaderboard_object()
129
-
130
  def refresh_overall_leaderboard():
131
  return format_leaderboard_table(df_results=current_dataframe)
132
-
133
  # Refresh when data version changes
134
  data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
135
-
136
  # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
137
  # gr.Markdown(
138
  # "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
@@ -245,7 +258,7 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
245
  question = f"{i+1}. {question}"
246
  with gr.Accordion(question, open=False):
247
  gr.Markdown(f"*{answer}*") # Italics for answers
248
-
249
  # Footnote
250
  gr.Markdown(
251
  f"""
@@ -258,4 +271,4 @@ with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
258
  )
259
 
260
  if __name__ == "__main__":
261
- demo.launch(ssr_mode=False)
 
1
  import pandas as pd
2
 
3
  import gradio as gr
4
+ from gradio.themes.utils import sizes
5
  from gradio_leaderboard import Leaderboard
6
 
7
  from about import ABOUT_INTRO, ABOUT_TEXT, FAQS, SUBMIT_INTRUCTIONS
8
  from constants import (
9
+ ASSAY_RENAME, # noqa: F401
10
  EXAMPLE_FILE_DICT,
11
  LEADERBOARD_DISPLAY_COLUMNS,
12
  ABOUT_TAB_NAME,
 
19
  from submit import make_submission
20
  from utils import fetch_hf_results, show_output_box
21
 
22
+
23
  def format_leaderboard_table(df_results: pd.DataFrame, assay: str | None = None):
24
  df = df_results.query("assay.isin(@ASSAY_RENAME.keys())").copy()
25
  if assay is not None:
 
30
  # Note: We can also just say the following as a text box at the bottom of the leaderboard: "Note: Results for the Heldout Test Set are only evaluated at competition close"
31
  # Convert spearman column to string to avoid dtype incompatibility when assigning text
32
  df["spearman"] = df["spearman"].astype(str)
33
+ df.loc[
34
+ (df["dataset"] == "Heldout Test Set") & (df["spearman"] == "nan"), "spearman"
35
+ ] = "N/A, evaluated at competition close"
36
+
37
  # Finally, rename columns for readability
38
  df = df.rename(columns=LEADERBOARD_COLUMNS_RENAME)
39
  return df
 
49
  lb = Leaderboard(
50
  value=format_leaderboard_table(df_results=current_dataframe, assay=assay),
51
  datatype=["str", "str", "str", "number"],
52
+ select_columns=LEADERBOARD_COLUMNS_RENAME_LIST(
53
+ ["model", "property", "spearman", "dataset"]
54
+ ),
55
+ search_columns=["Model Name"],
56
  filter_columns=LEADERBOARD_COLUMNS_RENAME_LIST(filter_columns),
57
  every=15,
58
  render=True,
 
67
  with gr.Blocks(theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
68
  timer = gr.Timer(3) # Run every 3 seconds when page is focused
69
  data_version = gr.State(value=0) # Track data changes
70
+
71
  def update_current_dataframe():
72
  global current_dataframe
73
  new_dataframe = fetch_hf_results()
74
+
75
  # Check if data has actually changed
76
  if not current_dataframe.equals(new_dataframe):
77
  current_dataframe = new_dataframe
78
  return data_version.value + 1 # Increment version to trigger updates
79
  return data_version.value
80
+
81
  timer.tick(fn=update_current_dataframe, outputs=data_version)
82
+
83
  ## Header
84
+
85
  with gr.Row():
86
  with gr.Column(scale=6): # bigger text area
87
  gr.Markdown(
88
  f"""
89
  ## Welcome to the Ginkgo Antibody Developability Benchmark!
90
 
 
 
91
  Participants can submit their model to the leaderboards by simply uploading a CSV file (see the "✉️ Submit" tab).
92
+
93
+ You can **predict any or all of the 5 properties**, and you can filter the main leaderboard by property.
94
  See more details in the "{ABOUT_TAB_NAME}" tab.
95
  """
96
  )
 
99
  value="./assets/competition_logo.jpg",
100
  show_label=False,
101
  show_download_button=False,
102
+ width="25vw", # Take up the width of the column (2/8 = 1/4)
103
  )
104
+
105
  with gr.Tabs(elem_classes="tab-buttons"):
106
  with gr.TabItem(ABOUT_TAB_NAME, elem_id="abdev-benchmark-tab-table"):
107
  gr.Markdown(ABOUT_INTRO)
108
+ gr.Image(
109
+ value="./assets/prediction_explainer.png",
110
+ show_label=False,
111
+ show_download_button=False,
112
+ width="50vw",
113
+ )
114
  gr.Markdown(ABOUT_TEXT)
115
 
116
  # Procedurally make these 5 tabs
 
121
  # ) as tab_item:
122
  # gr.Markdown(f"# {ASSAY_DESCRIPTION[assay]}")
123
  # lb = get_leaderboard_object(assay=assay)
124
+
125
  # def refresh_leaderboard(assay=assay):
126
  # return format_leaderboard_table(df_results=current_dataframe, assay=assay)
127
+
128
  # # Refresh when data version changes
129
  # data_version.change(fn=refresh_leaderboard, outputs=lb)
130
 
131
  # Note(Lood): Trying out just one leaderboard. We could also have a dropdown here that shows different leaderboards for each property, but that's just the same as the filters
132
+ with gr.TabItem(
133
+ "🏆 Leaderboard", elem_id="abdev-benchmark-tab-table"
134
+ ) as leaderboard_tab:
135
  gr.Markdown(
136
+ """
137
+ # Overall Leaderboard (filter below by property)
138
+ Each property has its own prize, and participants can submit models for any combination of properties.
139
+ """
140
  )
141
  lb = get_leaderboard_object()
142
+
143
  def refresh_overall_leaderboard():
144
  return format_leaderboard_table(df_results=current_dataframe)
145
+
146
  # Refresh when data version changes
147
  data_version.change(fn=refresh_overall_leaderboard, outputs=lb)
148
+
149
  # At the bottom of the leaderboard, we can keep as NaN and explain missing test set results
150
  # gr.Markdown(
151
  # "_ℹ️ Results for the private test set will not be shown here and will be used for final judging at the close of the competition._"
 
258
  question = f"{i+1}. {question}"
259
  with gr.Accordion(question, open=False):
260
  gr.Markdown(f"*{answer}*") # Italics for answers
261
+
262
  # Footnote
263
  gr.Markdown(
264
  f"""
 
271
  )
272
 
273
  if __name__ == "__main__":
274
+ demo.launch(ssr_mode=False, share=True)
constants.py CHANGED
@@ -55,7 +55,9 @@ ANTIBODY_NAMES_DICT = {
55
  "GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
56
  "antibody_name"
57
  ].tolist(),
58
- "Heldout Test Set": pd.read_csv(EXAMPLE_FILE_DICT["Heldout Test Set"])["antibody_name"].tolist(),
 
 
59
  }
60
 
61
  # Huggingface API
@@ -69,8 +71,22 @@ SUBMISSIONS_REPO = f"{ORGANIZATION}/abdev-bench-submissions"
69
  RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
70
 
71
  # Leaderboard dataframes
72
- LEADERBOARD_RESULTS_COLUMNS = ["model", "assay", "spearman", "dataset", "user", "submission_time"] # The columns expected from the results dataset
73
- LEADERBOARD_DISPLAY_COLUMNS = ["model", "property", "spearman", "dataset", "user", "submission_time"] # After changing assay to property (pretty formatting)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  LEADERBOARD_COLUMNS_RENAME = {
75
  "spearman": "Spearman Correlation",
76
  "dataset": "Dataset",
@@ -79,5 +95,7 @@ LEADERBOARD_COLUMNS_RENAME = {
79
  "model": "Model Name",
80
  "property": "Property",
81
  }
 
 
82
  def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
83
- return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x,x), columns))
 
55
  "GDPa1_cross_validation": pd.read_csv(EXAMPLE_FILE_DICT["GDPa1_cross_validation"])[
56
  "antibody_name"
57
  ].tolist(),
58
+ "Heldout Test Set": pd.read_csv(EXAMPLE_FILE_DICT["Heldout Test Set"])[
59
+ "antibody_name"
60
+ ].tolist(),
61
  }
62
 
63
  # Huggingface API
 
71
  RESULTS_REPO = f"{ORGANIZATION}/abdev-bench-results"
72
 
73
  # Leaderboard dataframes
74
+ LEADERBOARD_RESULTS_COLUMNS = [
75
+ "model",
76
+ "assay",
77
+ "spearman",
78
+ "dataset",
79
+ "user",
80
+ "submission_time",
81
+ ] # The columns expected from the results dataset
82
+ LEADERBOARD_DISPLAY_COLUMNS = [
83
+ "model",
84
+ "property",
85
+ "spearman",
86
+ "dataset",
87
+ "user",
88
+ "submission_time",
89
+ ] # After changing assay to property (pretty formatting)
90
  LEADERBOARD_COLUMNS_RENAME = {
91
  "spearman": "Spearman Correlation",
92
  "dataset": "Dataset",
 
95
  "model": "Model Name",
96
  "property": "Property",
97
  }
98
+
99
+
100
  def LEADERBOARD_COLUMNS_RENAME_LIST(columns: list[str]) -> list[str]:
101
+ return list(map(lambda x: LEADERBOARD_COLUMNS_RENAME.get(x, x), columns))
data/example-predictions-heldout.csv CHANGED
@@ -78,4 +78,4 @@ P907-A14-unary-estuary-9ae8d,EVQLVESGGGLVQPGGSLRLSCAASGFTFSRYWMSWVRQAPGKGLEWVANI
78
  P907-A14-undirected-hull-8daff,QMQLVQSGAEVRKPGASVKVSCKASGYTFTGHYIHWVRQAPGRGPEWMGWINPNSGGTNSSQSFQGRVTMTRDTSISTAYMELSRLTSDDTAVYSCARARYGDYYYFDSWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRASQDISSYLAWYQQKPEKAPKSLIYAASSLQGGVPSRFSGSGSGTHFTLTISSLQPEDFATYYCQQYYSYPVTFGPGTKVDIK,QMQLVQS-GAEVRKPGASVKVSCKASG-YTFTG-----HYIHWVRQAPGRGPEWMGWINPN---SGGTNSSQSFQGRVTMTRDTSISTAYMELSRLTSDDTAVYSCARARYGD-------------------YYYFDSWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRAS--QDIS------SYLAWYQQKPEKAPKSLIYA--------ASSLQGGVPSRFSGSGSG--THFTLTISSLQPEDFATYYCQQYYS-----------------------YPVTFGPGTKVDIK-,IgG1,Kappa
79
  P907-A14-vain-bucket-0f231,QVQLQQWGAGLLKPSETLSLTCAVYNGSSSAHYWSWVRQPPGKGLEWIGEISHGGSTTYNPSLKGRVSISVDTPKNQFSLNLSSVTAADTAVYYCATRAIHFRNRNFYSFYVEVWGKGTTVTVSS,EIVLTQSPGTLSLSPGERATLSCRASQSVSSSKLVWYQQRPGQAPRPLIYGASSRATGIPDRFSGSGSETDFTLTISWLEPEDFAVYYCHQYGSSPRTFGQGTKVEIK,QVQLQQW-GAGLLKPSETLSLTCAVYN-GSSSA-----HYWSWVRQPPGKGLEWIGEISH----GGSTTYNPSLKGRVSISVDTPKNQFSLNLSSVTAADTAVYYCATRAIHFRNR-------------NFYSFYVEVWGKGTTVTVSS,EIVLTQSPGTLSLSPGERATLSCRAS--QSVSS-----SKLVWYQQRPGQAPRPLIYG--------ASSRATGIPDRFSGSGSE--TDFTLTISWLEPEDFAVYYCHQYGS-----------------------SPRTFGQGTKVEIK-,IgG1,Kappa
80
  P907-A14-wintry-couple-24188,QVQLQQWGAGLLKPSETLSVTCAVYGGSFIGSSWIWIRQPPEKGLEWIGEINHGGSTTYNPSLKSRVTISLDMSKNQFSLNLTSVTAADTAVYYCATDRGSLAAVDWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRASQAISSYLAWYQQKPGKVPKLLIYAASTLQSGVASRFTGSGSGTDFTLTISSLQPEDVATYYCQKYNSAPRTFGQGTRVEIK,QVQLQQW-GAGLLKPSETLSVTCAVYG-GSFIG-----SSWIWIRQPPEKGLEWIGEINH----GGSTTYNPSLKSRVTISLDMSKNQFSLNLTSVTAADTAVYYCATDRGS---------------------LAAVDWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRAS--QAIS------SYLAWYQQKPGKVPKLLIYA--------ASTLQSGVASRFTGSGSG--TDFTLTISSLQPEDVATYYCQKYNS-----------------------APRTFGQGTRVEIK-,IgG1,Kappa
81
- P907-A14-witty-fugue-86932,EVQLVESGGGLVQPGRSLRLSCTASGFTFGDYAMNWVRQAPGKGLEWLGFIESKGYGGTTEYAASVKGRFIISRDDSKSIAYLQMNSLKTEDTAVYYCTPGDYWGQGTLVTVSS,SYELTQPPSVSVSPGQTARITCSGDALPKKYAYWYQQKSGQAPVQVIYEDSGRPSGIPERFSGSSSGTMATLTISGAQVEDEADYYCYSIDSSGNHRVFGGGTKLTVL,EVQLVES-GGGLVQPGRSLRLSCTASG-FTFGD-----YAMNWVRQAPGKGLEWLGFIESKG-YGGTTEYAASVKGRFIISRDDSKSIAYLQMNSLKTEDTAVYYCTPG---------------------------DYWGQGTLVTVSS,SYELTQP-PSVSVSPGQTARITCSGD---ALPK-----KYAYWYQQKSGQAPVQVIYE--------DSGRPSGIPERFSGSSSG--TMATLTISGAQVEDEADYYCYSIDSS---------------------GNHRVFGGGTKLTVL-,IgG1,Lambda
 
78
  P907-A14-undirected-hull-8daff,QMQLVQSGAEVRKPGASVKVSCKASGYTFTGHYIHWVRQAPGRGPEWMGWINPNSGGTNSSQSFQGRVTMTRDTSISTAYMELSRLTSDDTAVYSCARARYGDYYYFDSWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRASQDISSYLAWYQQKPEKAPKSLIYAASSLQGGVPSRFSGSGSGTHFTLTISSLQPEDFATYYCQQYYSYPVTFGPGTKVDIK,QMQLVQS-GAEVRKPGASVKVSCKASG-YTFTG-----HYIHWVRQAPGRGPEWMGWINPN---SGGTNSSQSFQGRVTMTRDTSISTAYMELSRLTSDDTAVYSCARARYGD-------------------YYYFDSWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRAS--QDIS------SYLAWYQQKPEKAPKSLIYA--------ASSLQGGVPSRFSGSGSG--THFTLTISSLQPEDFATYYCQQYYS-----------------------YPVTFGPGTKVDIK-,IgG1,Kappa
79
  P907-A14-vain-bucket-0f231,QVQLQQWGAGLLKPSETLSLTCAVYNGSSSAHYWSWVRQPPGKGLEWIGEISHGGSTTYNPSLKGRVSISVDTPKNQFSLNLSSVTAADTAVYYCATRAIHFRNRNFYSFYVEVWGKGTTVTVSS,EIVLTQSPGTLSLSPGERATLSCRASQSVSSSKLVWYQQRPGQAPRPLIYGASSRATGIPDRFSGSGSETDFTLTISWLEPEDFAVYYCHQYGSSPRTFGQGTKVEIK,QVQLQQW-GAGLLKPSETLSLTCAVYN-GSSSA-----HYWSWVRQPPGKGLEWIGEISH----GGSTTYNPSLKGRVSISVDTPKNQFSLNLSSVTAADTAVYYCATRAIHFRNR-------------NFYSFYVEVWGKGTTVTVSS,EIVLTQSPGTLSLSPGERATLSCRAS--QSVSS-----SKLVWYQQRPGQAPRPLIYG--------ASSRATGIPDRFSGSGSE--TDFTLTISWLEPEDFAVYYCHQYGS-----------------------SPRTFGQGTKVEIK-,IgG1,Kappa
80
  P907-A14-wintry-couple-24188,QVQLQQWGAGLLKPSETLSVTCAVYGGSFIGSSWIWIRQPPEKGLEWIGEINHGGSTTYNPSLKSRVTISLDMSKNQFSLNLTSVTAADTAVYYCATDRGSLAAVDWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRASQAISSYLAWYQQKPGKVPKLLIYAASTLQSGVASRFTGSGSGTDFTLTISSLQPEDVATYYCQKYNSAPRTFGQGTRVEIK,QVQLQQW-GAGLLKPSETLSVTCAVYG-GSFIG-----SSWIWIRQPPEKGLEWIGEINH----GGSTTYNPSLKSRVTISLDMSKNQFSLNLTSVTAADTAVYYCATDRGS---------------------LAAVDWGQGTLVTVSS,DIQMTQSPSSLSASVGDRVTITCRAS--QAIS------SYLAWYQQKPGKVPKLLIYA--------ASTLQSGVASRFTGSGSG--TDFTLTISSLQPEDVATYYCQKYNS-----------------------APRTFGQGTRVEIK-,IgG1,Kappa
81
+ P907-A14-witty-fugue-86932,EVQLVESGGGLVQPGRSLRLSCTASGFTFGDYAMNWVRQAPGKGLEWLGFIESKGYGGTTEYAASVKGRFIISRDDSKSIAYLQMNSLKTEDTAVYYCTPGDYWGQGTLVTVSS,SYELTQPPSVSVSPGQTARITCSGDALPKKYAYWYQQKSGQAPVQVIYEDSGRPSGIPERFSGSSSGTMATLTISGAQVEDEADYYCYSIDSSGNHRVFGGGTKLTVL,EVQLVES-GGGLVQPGRSLRLSCTASG-FTFGD-----YAMNWVRQAPGKGLEWLGFIESKG-YGGTTEYAASVKGRFIISRDDSKSIAYLQMNSLKTEDTAVYYCTPG---------------------------DYWGQGTLVTVSS,SYELTQP-PSVSVSPGQTARITCSGD---ALPK-----KYAYWYQQKSGQAPVQVIYE--------DSGRPSGIPERFSGSSSG--TMATLTISGAQVEDEADYYCYSIDSS---------------------GNHRVFGGGTKLTVL-,IgG1,Lambda
submit.py CHANGED
@@ -3,7 +3,6 @@ import tempfile
3
  from typing import BinaryIO
4
  import json
5
 
6
- from click import pass_obj
7
  import gradio as gr
8
  from datetime import datetime, timezone
9
  import uuid
@@ -58,6 +57,7 @@ def upload_submission(
58
  )
59
  Path(tmp_name).unlink()
60
 
 
61
  def make_submission(
62
  submitted_file: BinaryIO,
63
  user_state,
@@ -79,7 +79,9 @@ def make_submission(
79
  model_description = ""
80
  # raise gr.Error("Please provide a model description.") # Not mandatory anymore
81
  if str(registration_code).strip().upper() != REGISTRATION_CODE:
82
- raise gr.Error("Invalid registration code. Please register on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>.")
 
 
83
  if submitted_file is None:
84
  raise gr.Error("Please upload a CSV file before submitting.")
85
 
 
3
  from typing import BinaryIO
4
  import json
5
 
 
6
  import gradio as gr
7
  from datetime import datetime, timezone
8
  import uuid
 
57
  )
58
  Path(tmp_name).unlink()
59
 
60
+
61
  def make_submission(
62
  submitted_file: BinaryIO,
63
  user_state,
 
79
  model_description = ""
80
  # raise gr.Error("Please provide a model description.") # Not mandatory anymore
81
  if str(registration_code).strip().upper() != REGISTRATION_CODE:
82
+ raise gr.Error(
83
+ "Invalid registration code. Please register on the <a href='https://datapoints.ginkgo.bio/ai-competitions/2025-abdev-competition'>Competition Registration page</a> or email <a href='mailto:[email protected]'>[email protected]</a>."
84
+ )
85
  if submitted_file is None:
86
  raise gr.Error("Please upload a CSV file before submitting.")
87
 
utils.py CHANGED
@@ -5,12 +5,13 @@ import hashlib
5
  from typing import Iterable, Union
6
  from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS
7
 
8
- pd.set_option('display.max_columns', None)
9
 
10
 
11
  def show_output_box(message):
12
  return gr.update(value=message, visible=True)
13
 
 
14
  def anonymize_user(username: str) -> str:
15
  # Anonymize using a hash of the username
16
  return hashlib.sha256(username.encode()).hexdigest()[:8]
@@ -20,16 +21,21 @@ def fetch_hf_results():
20
  # For debugging
21
  # # Print current time in EST
22
  # EST = timezone(timedelta(hours=-4))
23
- # print(f"tmp: Fetching results from HF at {datetime.now(EST)}")
24
  # Should cache by default if not using force_redownload
25
  df = load_dataset(
26
- RESULTS_REPO, data_files="auto_submissions/metrics_all.csv",
 
27
  )["train"].to_pandas()
28
- assert all(col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_COLUMNS) - set(df.columns)}"
 
 
29
  # Show latest submission only
30
- df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay", "user"], keep="first")
 
 
31
  df["property"] = df["assay"].map(ASSAY_RENAME)
32
-
33
  # Anonymize the user column at this point
34
  df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
35
 
@@ -66,14 +72,14 @@ def readable_hash(
66
  data: Union[str, bytes, Iterable[int]],
67
  *,
68
  salt: Union[str, bytes, None] = None,
69
- words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS+NOUNS),
70
  sep: str = "-",
71
  checksum_len: int = 2, # 0 to disable; 2–3 is plenty
72
- case: str = "lower" # "lower" | "title" | "upper"
73
  ) -> str:
74
  """
75
  Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.
76
-
77
  Examples
78
  --------
79
  >>> readable_hash("hello world")
 
5
  from typing import Iterable, Union
6
  from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS
7
 
8
+ pd.set_option("display.max_columns", None)
9
 
10
 
11
  def show_output_box(message):
12
  return gr.update(value=message, visible=True)
13
 
14
+
15
  def anonymize_user(username: str) -> str:
16
  # Anonymize using a hash of the username
17
  return hashlib.sha256(username.encode()).hexdigest()[:8]
 
21
  # For debugging
22
  # # Print current time in EST
23
  # EST = timezone(timedelta(hours=-4))
24
+ # print(f"tmp: Fetching results from HF at {datetime.now(EST)}")
25
  # Should cache by default if not using force_redownload
26
  df = load_dataset(
27
+ RESULTS_REPO,
28
+ data_files="auto_submissions/metrics_all.csv",
29
  )["train"].to_pandas()
30
+ assert all(
31
+ col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS
32
+ ), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_RESULTS_COLUMNS) - set(df.columns)}"
33
  # Show latest submission only
34
+ df = df.sort_values("submission_time", ascending=False).drop_duplicates(
35
+ subset=["model", "assay", "user"], keep="first"
36
+ )
37
  df["property"] = df["assay"].map(ASSAY_RENAME)
38
+
39
  # Anonymize the user column at this point
40
  df.loc[df["anonymous"] != False, "user"] = "anon-" + df.loc[df["anonymous"] != False, "user"].apply(readable_hash)
41
 
 
72
  data: Union[str, bytes, Iterable[int]],
73
  *,
74
  salt: Union[str, bytes, None] = None,
75
+ words: tuple[list[str], list[str]] = (ADJECTIVES, ANIMALS + NOUNS),
76
  sep: str = "-",
77
  checksum_len: int = 2, # 0 to disable; 2–3 is plenty
78
+ case: str = "lower", # "lower" | "title" | "upper"
79
  ) -> str:
80
  """
81
  Deterministically map input data to 'adjective-animal[-checksum]'. Generated using ChatGPT.
82
+
83
  Examples
84
  --------
85
  >>> readable_hash("hello world")