jfaustin commited on
Commit
d6a8f4f
·
1 Parent(s): c08afad

wip: explain scores

Browse files
folding_studio_demo/app.py CHANGED
@@ -9,6 +9,7 @@ from gradio_molecule3d import Molecule3D
9
 
10
  from folding_studio_demo.correlate import (
11
  SCORE_COLUMNS,
 
12
  fake_predict_and_correlate,
13
  make_regression_plot,
14
  compute_correlation_data,
@@ -181,6 +182,7 @@ def create_correlation_tab():
181
  of binding strength.
182
  """)
183
  spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
 
184
  prettified_columns = {
185
  "antibody_name": "Antibody Name",
186
  "KD (nM)": "KD (nM)",
@@ -213,14 +215,14 @@ def create_correlation_tab():
213
  with gr.Row():
214
  prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
215
  with gr.Row():
216
- with gr.Column():
217
  correlation_type = gr.Radio(
218
  choices=["Spearman", "Pearson", "R²"],
219
  value="Spearman",
220
  label="Correlation Type",
221
  interactive=True
222
  )
223
- with gr.Column():
224
  correlation_ranking_plot = gr.Plot(label="Correlation ranking")
225
  with gr.Row():
226
  with gr.Column():
@@ -232,6 +234,37 @@ def create_correlation_tab():
232
  # Add checkbox for log scale and update plot when either input changes
233
  with gr.Row():
234
  log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  with gr.Column():
236
  correlation_plot = gr.Plot(label="Correlation with binding affinity")
237
 
 
9
 
10
  from folding_studio_demo.correlate import (
11
  SCORE_COLUMNS,
12
+ SCORE_COLUMN_NAMES,
13
  fake_predict_and_correlate,
14
  make_regression_plot,
15
  compute_correlation_data,
 
182
  of binding strength.
183
  """)
184
  spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
185
+ spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES)
186
  prettified_columns = {
187
  "antibody_name": "Antibody Name",
188
  "KD (nM)": "KD (nM)",
 
215
  with gr.Row():
216
  prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
217
  with gr.Row():
218
+ with gr.Row():
219
  correlation_type = gr.Radio(
220
  choices=["Spearman", "Pearson", "R²"],
221
  value="Spearman",
222
  label="Correlation Type",
223
  interactive=True
224
  )
225
+ with gr.Row():
226
  correlation_ranking_plot = gr.Plot(label="Correlation ranking")
227
  with gr.Row():
228
  with gr.Column():
 
234
  # Add checkbox for log scale and update plot when either input changes
235
  with gr.Row():
236
  log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False)
237
+ with gr.Row():
238
+ def get_score_description(score: str) -> str:
239
+ descriptions = {
240
+ "Boltz Confidence Score": "The Boltz confidence score provides an overall assessment of prediction quality (0-1, higher is better).",
241
+ "Boltz pTM Score": "The Boltz predicted TM-score (pTM) assesses the overall fold accuracy of the predicted structure (0-1, higher is better).",
242
+ "Boltz ipTM Score": "The Boltz interface pTM score (ipTM) specifically evaluates the accuracy of interface regions (0-1, higher is better).",
243
+ "Boltz Complex pLDDT": "The Boltz Complex pLDDT measures confidence in local structure predictions across the entire complex (0-100, higher is better).",
244
+ "Boltz Complex ipLDDT": "The Boltz Complex interface pLDDT (ipLDDT) focuses on confidence in interface region predictions (0-100, higher is better).",
245
+ "Boltz Complex pDE": "The Boltz Complex predicted distance error (pDE) estimates the confidence in predicted distances between residues (0-1, higher is better).",
246
+ "Boltz Complex ipDE": "The Boltz Complex interface pDE (ipDE) estimates confidence in predicted distances specifically at interfaces (0-1, higher is better).",
247
+ "Monomer Interchain PAE": "The monomer interchain predicted aligned error (PAE) estimates position errors between chains in monomeric predictions (lower is better).",
248
+ "Monomer Interface PAE": "The monomer interface PAE estimates position errors specifically at interfaces in monomeric predictions (lower is better).",
249
+ "Monomer Overall PAE": "The monomer overall PAE estimates position errors across the entire structure in monomeric predictions (lower is better).",
250
+ "Monomer Interface pLDDT": "The monomer interface pLDDT measures confidence in interface region predictions for monomeric models (0-100, higher is better).",
251
+ "Monomer Average pLDDT": "The monomer average pLDDT provides the mean confidence across all residues in monomeric predictions (0-100, higher is better).",
252
+ "Monomer pTM Score": "The monomer pTM score assesses overall fold accuracy in monomeric predictions (0-1, higher is better).",
253
+ "Monomer Interface pTM": "The monomer interface pTM specifically evaluates accuracy of interface regions in monomeric predictions (0-1, higher is better).",
254
+ "Multimer Interchain PAE": "The multimer interchain PAE estimates position errors between chains in multimeric predictions (lower is better).",
255
+ "Multimer Interface PAE": "The multimer interface PAE estimates position errors specifically at interfaces in multimeric predictions (lower is better).",
256
+ "Multimer Overall PAE": "The multimer overall PAE estimates position errors across the entire structure in multimeric predictions (lower is better).",
257
+ "Multimer Interface pLDDT": "The multimer interface pLDDT measures confidence in interface region predictions for multimeric models (0-100, higher is better).",
258
+ "Multimer Average pLDDT": "The multimer average pLDDT provides the mean confidence across all residues in multimeric predictions (0-100, higher is better).",
259
+ "Multimer pTM Score": "The multimer pTM score assesses overall fold accuracy in multimeric predictions (0-1, higher is better).",
260
+ "Multimer Interface pTM": "The multimer interface pTM specifically evaluates accuracy of interface regions in multimeric predictions (0-1, higher is better)."
261
+ }
262
+ return descriptions.get(score, "No description available for this score.")
263
+
264
+ gr.Markdown(
265
+ value=lambda: get_score_description(correlation_column.value),
266
+ every=correlation_column
267
+ )
268
  with gr.Column():
269
  correlation_plot = gr.Plot(label="Correlation with binding affinity")
270
 
folding_studio_demo/correlate.py CHANGED
@@ -95,9 +95,6 @@ def plot_correlation_ranking(corr_data: pd.DataFrame, correlation_type: str) ->
95
  def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
96
  """Fake predict structures of all complexes and correlate the results."""
97
 
98
- # Rename score columns using the mapping in SCORE_COLUMN_NAMES
99
- spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES)
100
-
101
  corr_data = compute_correlation_data(spr_data_with_scores, score_cols)
102
  corr_ranking_plot = plot_correlation_ranking(corr_data, "Spearman")
103
 
 
95
  def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
96
  """Fake predict structures of all complexes and correlate the results."""
97
 
 
 
 
98
  corr_data = compute_correlation_data(spr_data_with_scores, score_cols)
99
  corr_ranking_plot = plot_correlation_ranking(corr_data, "Spearman")
100