Spaces:

InstaDeepAI
/

folding-studio-demo

Running

App Files Files Community

jfaustin commited on 7 days ago

Commit

8192214

1 Parent(s): ecc0f00

choose correlation ranking plot

Browse files

Files changed (2) hide show

folding_studio_demo/app.py +29 -7
folding_studio_demo/correlate.py +50 -19

folding_studio_demo/app.py CHANGED Viewed

@@ -10,7 +10,9 @@ from gradio_molecule3d import Molecule3D
 from folding_studio_demo.correlate import (
     SCORE_COLUMNS,
     fake_predict_and_correlate,
-    make_correlation_plot,
 )
 from folding_studio_demo.predict import predict, predict_comparison
@@ -211,7 +213,15 @@ def create_correlation_tab():
     with gr.Row():
         prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
     with gr.Row():
-        correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row():
         with gr.Column():
             with gr.Row():
@@ -229,21 +239,33 @@ def create_correlation_tab():
         fn=lambda x: fake_predict_and_correlate(
             spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"]
         ),
-        inputs=None,
         outputs=[prediction_dataframe, correlation_ranking_plot, correlation_plot],
     )
-    def update_plot(score, use_log):
-        return make_correlation_plot(spr_data_with_scores, score, use_log)
     correlation_column.change(
-        fn=update_plot,
         inputs=[correlation_column, log_scale],
         outputs=correlation_plot,
     )
     log_scale.change(
-        fn=update_plot,
         inputs=[correlation_column, log_scale],
         outputs=correlation_plot,
     )

 from folding_studio_demo.correlate import (
     SCORE_COLUMNS,
     fake_predict_and_correlate,
+    make_regression_plot,
+    compute_correlation_data,
+    plot_correlation_ranking
 )
 from folding_studio_demo.predict import predict, predict_comparison
     with gr.Row():
         prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
     with gr.Row():
+        with gr.Column():
+            correlation_type = gr.Radio(
+                choices=["Spearman", "Pearson", "R²"],
+                value="Spearman",
+                label="Correlation Type",
+                interactive=True
+            )
+        with gr.Column():
+            correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row():
         with gr.Column():
             with gr.Row():
         fn=lambda x: fake_predict_and_correlate(
             spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"]
         ),
+        inputs=[correlation_type],
         outputs=[prediction_dataframe, correlation_ranking_plot, correlation_plot],
     )
+    def update_regression_plot(score, use_log):
+        return make_regression_plot(spr_data_with_scores, score, use_log)
+    def update_correlation_plot(correlation_type):
+        logger.info(f"Updating correlation plot for {correlation_type}")
+        corr_data = compute_correlation_data(spr_data_with_scores, SCORE_COLUMNS)
+        logger.info(f"Correlation data: {corr_data}")
+        return plot_correlation_ranking(corr_data, correlation_type)
     correlation_column.change(
+        fn=update_regression_plot,
         inputs=[correlation_column, log_scale],
         outputs=correlation_plot,
     )
+    correlation_type.change(
+        fn=update_correlation_plot,
+        inputs=[correlation_type],
+        outputs=correlation_ranking_plot,
+    )
     log_scale.change(
+        fn=update_regression_plot,
         inputs=[correlation_column, log_scale],
         outputs=correlation_plot,
     )

folding_studio_demo/correlate.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import logging
 import pandas as pd
 import numpy as np
 import plotly.graph_objects as go
-from scipy.stats import spearmanr
 logger = logging.getLogger(__name__)
@@ -30,16 +31,32 @@ SCORE_COLUMNS = [
         "interface_ptm_multimer"
     ]
-def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
-    """Fake predict structures of all complexes and correlate the results."""
     corr_data = []
     spr_data_with_scores["log_kd"] = np.log10(spr_data_with_scores["KD (nM)"])
     kd_col = "KD (nM)"
-    for score_col in score_cols:
-        logger.info(f"Computing correlation between {score_col} and KD (nM)")
-        res = spearmanr(spr_data_with_scores[kd_col], spr_data_with_scores[score_col])
-        corr_data.append({"score": score_col, "correlation": res.statistic, "p-value": res.pvalue})
-        logger.info(f"Correlation between {score_col} and KD (nM): {res.statistic}")
     corr_data = pd.DataFrame(corr_data)
     # Find the lines in corr_data with NaN values and remove them
@@ -47,34 +64,48 @@ def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: l
     # Sort correlation data by correlation value
     corr_data = corr_data.sort_values('correlation', ascending=True)
     # Create bar plot of correlations
     corr_ranking_plot = go.Figure(data=[
         go.Bar(
-            x=corr_data["correlation"],
-            y=corr_data["score"],
-            name="correlation",
             orientation='h',
             hovertemplate="<i>Score:</i> %{y}<br><i>Correlation:</i> %{x:.3f}<br>"
         )
     ])
     corr_ranking_plot.update_layout(
         title="Correlation with Binding Affinity",
-        yaxis_title="Score Type",
-        xaxis_title="Spearman Correlation",
         template="simple_white",
         showlegend=False
     )
     cols_to_show = main_cols[:]
     cols_to_show.extend(score_cols)
-    corr_plot = make_correlation_plot(spr_data_with_scores, score_cols[0], use_log=False)
     return spr_data_with_scores[cols_to_show].round(2), corr_ranking_plot, corr_plot
-def make_correlation_plot(spr_data_with_scores: pd.DataFrame, score: str, use_log: bool) -> go.Figure:
-    """Select the correlation plot to display."""
-    # corr_plot is a scatter plot of the correlation between the binding affinity and each of the scores
     scatter =  go.Scatter(
             x=spr_data_with_scores["KD (nM)"],
             y=spr_data_with_scores[score],
@@ -97,11 +128,11 @@ def make_correlation_plot(spr_data_with_scores: pd.DataFrame, score: str, use_lo
         ),
         xaxis_type="log" if use_log else "linear"  # Set x-axis to logarithmic scale
     )
-    # compute the correlation line
     corr_line = np.polyfit(spr_data_with_scores["KD (nM)"], spr_data_with_scores[score], 1)
     corr_line_x = np.linspace(min(spr_data_with_scores["KD (nM)"]), max(spr_data_with_scores["KD (nM)"]), 100)
     corr_line_y = corr_line[0] * corr_line_x + corr_line[1]
-    # add the correlation line to the plot
     corr_plot.add_trace(go.Scatter(
         x=corr_line_x,
         y=corr_line_y,

 import logging
 import pandas as pd
+from pathlib import Path
 import numpy as np
 import plotly.graph_objects as go
+from scipy.stats import spearmanr, pearsonr, linregress
 logger = logging.getLogger(__name__)
         "interface_ptm_multimer"
     ]
+def compute_correlation_data(spr_data_with_scores: pd.DataFrame, score_cols: list[str]) -> pd.DataFrame:
+    corr_data_file = Path("corr_data.csv")
+    if corr_data_file.exists():
+        logger.info(f"Loading correlation data from {corr_data_file}")
+        return pd.read_csv(corr_data_file)
     corr_data = []
     spr_data_with_scores["log_kd"] = np.log10(spr_data_with_scores["KD (nM)"])
     kd_col = "KD (nM)"
+    corr_funcs = {}
+    corr_funcs["Spearman"] = spearmanr
+    corr_funcs["Pearson"] = pearsonr
+    corr_funcs["R²"] = linregress
+    for correlation_type, corr_func in corr_funcs.items():
+        for score_col in score_cols:
+            logger.info(f"Computing {correlation_type} correlation between {score_col} and KD (nM)")
+            res = corr_func(spr_data_with_scores[kd_col], spr_data_with_scores[score_col])
+            logger.info(f"Correlation function: {corr_func}")
+            correlation_value = res.rvalue**2 if correlation_type == "R²" else res.statistic
+            corr_data.append({
+                "correlation_type": correlation_type,
+                "score": score_col,
+                "correlation": correlation_value,
+                "p-value": res.pvalue
+            })
+            logger.info(f"Correlation {correlation_type} between {score_col} and KD (nM): {correlation_value}")
     corr_data = pd.DataFrame(corr_data)
     # Find the lines in corr_data with NaN values and remove them
     # Sort correlation data by correlation value
     corr_data = corr_data.sort_values('correlation', ascending=True)
+    corr_data.to_csv("corr_data.csv", index=False)
+    return corr_data
+def plot_correlation_ranking(corr_data: pd.DataFrame, correlation_type: str) -> go.Figure:
     # Create bar plot of correlations
+    data = corr_data[corr_data["correlation_type"] == correlation_type]
     corr_ranking_plot = go.Figure(data=[
         go.Bar(
+            x=data["correlation"],
+            y=data["score"],
+            name=correlation_type,
+            text=data["correlation"],
             orientation='h',
             hovertemplate="<i>Score:</i> %{y}<br><i>Correlation:</i> %{x:.3f}<br>"
         )
     ])
     corr_ranking_plot.update_layout(
         title="Correlation with Binding Affinity",
+        yaxis_title="Score",
+        xaxis_title=correlation_type,
         template="simple_white",
         showlegend=False
     )
+    return corr_ranking_plot
+def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
+    """Fake predict structures of all complexes and correlate the results."""
+    corr_data = compute_correlation_data(spr_data_with_scores, score_cols)
+    corr_ranking_plot = plot_correlation_ranking(corr_data, "Spearman")
     cols_to_show = main_cols[:]
     cols_to_show.extend(score_cols)
+    corr_plot = make_regression_plot(spr_data_with_scores, score_cols[0], use_log=False)
     return spr_data_with_scores[cols_to_show].round(2), corr_ranking_plot, corr_plot
+def make_regression_plot(spr_data_with_scores: pd.DataFrame, score: str, use_log: bool) -> go.Figure:
+    """Select the regression plot to display."""
+    # corr_plot is a scatter plot of the regression between the binding affinity and each of the scores
     scatter =  go.Scatter(
             x=spr_data_with_scores["KD (nM)"],
             y=spr_data_with_scores[score],
         ),
         xaxis_type="log" if use_log else "linear"  # Set x-axis to logarithmic scale
     )
+    # compute the regression line
     corr_line = np.polyfit(spr_data_with_scores["KD (nM)"], spr_data_with_scores[score], 1)
     corr_line_x = np.linspace(min(spr_data_with_scores["KD (nM)"]), max(spr_data_with_scores["KD (nM)"]), 100)
     corr_line_y = corr_line[0] * corr_line_x + corr_line[1]
+    # add the regression line to the plot
     corr_plot.add_trace(go.Scatter(
         x=corr_line_x,
         y=corr_line_y,