Spaces:

InstaDeepAI
/

folding-studio-demo

Running

App Files Files Community

jfaustin commited on 7 days ago

Commit

d6a8f4f

1 Parent(s): c08afad

wip: explain scores

Browse files

Files changed (2) hide show

folding_studio_demo/app.py +35 -2
folding_studio_demo/correlate.py +0 -3

folding_studio_demo/app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from gradio_molecule3d import Molecule3D
 from folding_studio_demo.correlate import (
     SCORE_COLUMNS,
     fake_predict_and_correlate,
     make_regression_plot,
     compute_correlation_data,
@@ -181,6 +182,7 @@ def create_correlation_tab():
         of binding strength.
     """)
     spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
     prettified_columns = {
         "antibody_name": "Antibody Name",
         "KD (nM)": "KD (nM)",
@@ -213,14 +215,14 @@ def create_correlation_tab():
     with gr.Row():
         prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
     with gr.Row():
-        with gr.Column():
             correlation_type = gr.Radio(
                 choices=["Spearman", "Pearson", "R²"],
                 value="Spearman",
                 label="Correlation Type",
                 interactive=True
             )
-        with gr.Column():
             correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row():
         with gr.Column():
@@ -232,6 +234,37 @@ def create_correlation_tab():
                 # Add checkbox for log scale and update plot when either input changes
             with gr.Row():
                 log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False)
         with gr.Column():
             correlation_plot = gr.Plot(label="Correlation with binding affinity")

 from folding_studio_demo.correlate import (
     SCORE_COLUMNS,
+    SCORE_COLUMN_NAMES,
     fake_predict_and_correlate,
     make_regression_plot,
     compute_correlation_data,
         of binding strength.
     """)
     spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
+    spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES)
     prettified_columns = {
         "antibody_name": "Antibody Name",
         "KD (nM)": "KD (nM)",
     with gr.Row():
         prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
     with gr.Row():
+        with gr.Row():
             correlation_type = gr.Radio(
                 choices=["Spearman", "Pearson", "R²"],
                 value="Spearman",
                 label="Correlation Type",
                 interactive=True
             )
+        with gr.Row():
             correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row():
         with gr.Column():
                 # Add checkbox for log scale and update plot when either input changes
             with gr.Row():
                 log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False)
+            with gr.Row():
+                def get_score_description(score: str) -> str:
+                    descriptions = {
+                        "Boltz Confidence Score": "The Boltz confidence score provides an overall assessment of prediction quality (0-1, higher is better).",
+                        "Boltz pTM Score": "The Boltz predicted TM-score (pTM) assesses the overall fold accuracy of the predicted structure (0-1, higher is better).",
+                        "Boltz ipTM Score": "The Boltz interface pTM score (ipTM) specifically evaluates the accuracy of interface regions (0-1, higher is better).",
+                        "Boltz Complex pLDDT": "The Boltz Complex pLDDT measures confidence in local structure predictions across the entire complex (0-100, higher is better).",
+                        "Boltz Complex ipLDDT": "The Boltz Complex interface pLDDT (ipLDDT) focuses on confidence in interface region predictions (0-100, higher is better).",
+                        "Boltz Complex pDE": "The Boltz Complex predicted distance error (pDE) estimates the confidence in predicted distances between residues (0-1, higher is better).",
+                        "Boltz Complex ipDE": "The Boltz Complex interface pDE (ipDE) estimates confidence in predicted distances specifically at interfaces (0-1, higher is better).",
+                        "Monomer Interchain PAE": "The monomer interchain predicted aligned error (PAE) estimates position errors between chains in monomeric predictions (lower is better).",
+                        "Monomer Interface PAE": "The monomer interface PAE estimates position errors specifically at interfaces in monomeric predictions (lower is better).",
+                        "Monomer Overall PAE": "The monomer overall PAE estimates position errors across the entire structure in monomeric predictions (lower is better).",
+                        "Monomer Interface pLDDT": "The monomer interface pLDDT measures confidence in interface region predictions for monomeric models (0-100, higher is better).",
+                        "Monomer Average pLDDT": "The monomer average pLDDT provides the mean confidence across all residues in monomeric predictions (0-100, higher is better).",
+                        "Monomer pTM Score": "The monomer pTM score assesses overall fold accuracy in monomeric predictions (0-1, higher is better).",
+                        "Monomer Interface pTM": "The monomer interface pTM specifically evaluates accuracy of interface regions in monomeric predictions (0-1, higher is better).",
+                        "Multimer Interchain PAE": "The multimer interchain PAE estimates position errors between chains in multimeric predictions (lower is better).",
+                        "Multimer Interface PAE": "The multimer interface PAE estimates position errors specifically at interfaces in multimeric predictions (lower is better).",
+                        "Multimer Overall PAE": "The multimer overall PAE estimates position errors across the entire structure in multimeric predictions (lower is better).",
+                        "Multimer Interface pLDDT": "The multimer interface pLDDT measures confidence in interface region predictions for multimeric models (0-100, higher is better).",
+                        "Multimer Average pLDDT": "The multimer average pLDDT provides the mean confidence across all residues in multimeric predictions (0-100, higher is better).",
+                        "Multimer pTM Score": "The multimer pTM score assesses overall fold accuracy in multimeric predictions (0-1, higher is better).",
+                        "Multimer Interface pTM": "The multimer interface pTM specifically evaluates accuracy of interface regions in multimeric predictions (0-1, higher is better)."
+                    }
+                    return descriptions.get(score, "No description available for this score.")
+                gr.Markdown(
+                    value=lambda: get_score_description(correlation_column.value),
+                    every=correlation_column
+                )
         with gr.Column():
             correlation_plot = gr.Plot(label="Correlation with binding affinity")

folding_studio_demo/correlate.py CHANGED Viewed

@@ -95,9 +95,6 @@ def plot_correlation_ranking(corr_data: pd.DataFrame, correlation_type: str) ->
 def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
     """Fake predict structures of all complexes and correlate the results."""
-    # Rename score columns using the mapping in SCORE_COLUMN_NAMES
-    spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES)
     corr_data = compute_correlation_data(spr_data_with_scores, score_cols)
     corr_ranking_plot = plot_correlation_ranking(corr_data, "Spearman")

 def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
     """Fake predict structures of all complexes and correlate the results."""
     corr_data = compute_correlation_data(spr_data_with_scores, score_cols)
     corr_ranking_plot = plot_correlation_ranking(corr_data, "Spearman")