wip: explain scores
Browse files
folding_studio_demo/app.py
CHANGED
@@ -9,6 +9,7 @@ from gradio_molecule3d import Molecule3D
|
|
9 |
|
10 |
from folding_studio_demo.correlate import (
|
11 |
SCORE_COLUMNS,
|
|
|
12 |
fake_predict_and_correlate,
|
13 |
make_regression_plot,
|
14 |
compute_correlation_data,
|
@@ -181,6 +182,7 @@ def create_correlation_tab():
|
|
181 |
of binding strength.
|
182 |
""")
|
183 |
spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
|
|
|
184 |
prettified_columns = {
|
185 |
"antibody_name": "Antibody Name",
|
186 |
"KD (nM)": "KD (nM)",
|
@@ -213,14 +215,14 @@ def create_correlation_tab():
|
|
213 |
with gr.Row():
|
214 |
prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
|
215 |
with gr.Row():
|
216 |
-
with gr.
|
217 |
correlation_type = gr.Radio(
|
218 |
choices=["Spearman", "Pearson", "R²"],
|
219 |
value="Spearman",
|
220 |
label="Correlation Type",
|
221 |
interactive=True
|
222 |
)
|
223 |
-
with gr.
|
224 |
correlation_ranking_plot = gr.Plot(label="Correlation ranking")
|
225 |
with gr.Row():
|
226 |
with gr.Column():
|
@@ -232,6 +234,37 @@ def create_correlation_tab():
|
|
232 |
# Add checkbox for log scale and update plot when either input changes
|
233 |
with gr.Row():
|
234 |
log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
with gr.Column():
|
236 |
correlation_plot = gr.Plot(label="Correlation with binding affinity")
|
237 |
|
|
|
9 |
|
10 |
from folding_studio_demo.correlate import (
|
11 |
SCORE_COLUMNS,
|
12 |
+
SCORE_COLUMN_NAMES,
|
13 |
fake_predict_and_correlate,
|
14 |
make_regression_plot,
|
15 |
compute_correlation_data,
|
|
|
182 |
of binding strength.
|
183 |
""")
|
184 |
spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
|
185 |
+
spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES)
|
186 |
prettified_columns = {
|
187 |
"antibody_name": "Antibody Name",
|
188 |
"KD (nM)": "KD (nM)",
|
|
|
215 |
with gr.Row():
|
216 |
prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
|
217 |
with gr.Row():
|
218 |
+
with gr.Row():
|
219 |
correlation_type = gr.Radio(
|
220 |
choices=["Spearman", "Pearson", "R²"],
|
221 |
value="Spearman",
|
222 |
label="Correlation Type",
|
223 |
interactive=True
|
224 |
)
|
225 |
+
with gr.Row():
|
226 |
correlation_ranking_plot = gr.Plot(label="Correlation ranking")
|
227 |
with gr.Row():
|
228 |
with gr.Column():
|
|
|
234 |
# Add checkbox for log scale and update plot when either input changes
|
235 |
with gr.Row():
|
236 |
log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False)
|
237 |
+
with gr.Row():
|
238 |
+
def get_score_description(score: str) -> str:
|
239 |
+
descriptions = {
|
240 |
+
"Boltz Confidence Score": "The Boltz confidence score provides an overall assessment of prediction quality (0-1, higher is better).",
|
241 |
+
"Boltz pTM Score": "The Boltz predicted TM-score (pTM) assesses the overall fold accuracy of the predicted structure (0-1, higher is better).",
|
242 |
+
"Boltz ipTM Score": "The Boltz interface pTM score (ipTM) specifically evaluates the accuracy of interface regions (0-1, higher is better).",
|
243 |
+
"Boltz Complex pLDDT": "The Boltz Complex pLDDT measures confidence in local structure predictions across the entire complex (0-100, higher is better).",
|
244 |
+
"Boltz Complex ipLDDT": "The Boltz Complex interface pLDDT (ipLDDT) focuses on confidence in interface region predictions (0-100, higher is better).",
|
245 |
+
"Boltz Complex pDE": "The Boltz Complex predicted distance error (pDE) estimates the confidence in predicted distances between residues (0-1, higher is better).",
|
246 |
+
"Boltz Complex ipDE": "The Boltz Complex interface pDE (ipDE) estimates confidence in predicted distances specifically at interfaces (0-1, higher is better).",
|
247 |
+
"Monomer Interchain PAE": "The monomer interchain predicted aligned error (PAE) estimates position errors between chains in monomeric predictions (lower is better).",
|
248 |
+
"Monomer Interface PAE": "The monomer interface PAE estimates position errors specifically at interfaces in monomeric predictions (lower is better).",
|
249 |
+
"Monomer Overall PAE": "The monomer overall PAE estimates position errors across the entire structure in monomeric predictions (lower is better).",
|
250 |
+
"Monomer Interface pLDDT": "The monomer interface pLDDT measures confidence in interface region predictions for monomeric models (0-100, higher is better).",
|
251 |
+
"Monomer Average pLDDT": "The monomer average pLDDT provides the mean confidence across all residues in monomeric predictions (0-100, higher is better).",
|
252 |
+
"Monomer pTM Score": "The monomer pTM score assesses overall fold accuracy in monomeric predictions (0-1, higher is better).",
|
253 |
+
"Monomer Interface pTM": "The monomer interface pTM specifically evaluates accuracy of interface regions in monomeric predictions (0-1, higher is better).",
|
254 |
+
"Multimer Interchain PAE": "The multimer interchain PAE estimates position errors between chains in multimeric predictions (lower is better).",
|
255 |
+
"Multimer Interface PAE": "The multimer interface PAE estimates position errors specifically at interfaces in multimeric predictions (lower is better).",
|
256 |
+
"Multimer Overall PAE": "The multimer overall PAE estimates position errors across the entire structure in multimeric predictions (lower is better).",
|
257 |
+
"Multimer Interface pLDDT": "The multimer interface pLDDT measures confidence in interface region predictions for multimeric models (0-100, higher is better).",
|
258 |
+
"Multimer Average pLDDT": "The multimer average pLDDT provides the mean confidence across all residues in multimeric predictions (0-100, higher is better).",
|
259 |
+
"Multimer pTM Score": "The multimer pTM score assesses overall fold accuracy in multimeric predictions (0-1, higher is better).",
|
260 |
+
"Multimer Interface pTM": "The multimer interface pTM specifically evaluates accuracy of interface regions in multimeric predictions (0-1, higher is better)."
|
261 |
+
}
|
262 |
+
return descriptions.get(score, "No description available for this score.")
|
263 |
+
|
264 |
+
gr.Markdown(
|
265 |
+
value=lambda: get_score_description(correlation_column.value),
|
266 |
+
every=correlation_column
|
267 |
+
)
|
268 |
with gr.Column():
|
269 |
correlation_plot = gr.Plot(label="Correlation with binding affinity")
|
270 |
|
folding_studio_demo/correlate.py
CHANGED
@@ -95,9 +95,6 @@ def plot_correlation_ranking(corr_data: pd.DataFrame, correlation_type: str) ->
|
|
95 |
def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
|
96 |
"""Fake predict structures of all complexes and correlate the results."""
|
97 |
|
98 |
-
# Rename score columns using the mapping in SCORE_COLUMN_NAMES
|
99 |
-
spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES)
|
100 |
-
|
101 |
corr_data = compute_correlation_data(spr_data_with_scores, score_cols)
|
102 |
corr_ranking_plot = plot_correlation_ranking(corr_data, "Spearman")
|
103 |
|
|
|
95 |
def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
|
96 |
"""Fake predict structures of all complexes and correlate the results."""
|
97 |
|
|
|
|
|
|
|
98 |
corr_data = compute_correlation_data(spr_data_with_scores, score_cols)
|
99 |
corr_ranking_plot = plot_correlation_ranking(corr_data, "Spearman")
|
100 |
|