Spaces:

colbyford
/

esm3

Runtime error

App Files Files Community

Fix: Resolve runtime errors and Add: MCP support

by atalaydenknalbant - opened 7 days ago

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+153

-59

Files changed (2) hide show

README.md +1 -1
app.py +152 -58

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🧬
 colorFrom: gray
 colorTo: green
 sdk: gradio
-sdk_version: 4.37.1
 app_file: app.py
 pinned: true
 short_description: A frontier generative model for biology by EvolutionaryScale

 colorFrom: gray
 colorTo: green
 sdk: gradio
+sdk_version: 5.38.2
 app_file: app.py
 pinned: true
 short_description: A frontier generative model for biology by EvolutionaryScale

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import gradio as gr
 import numpy as np
 import os, tempfile
 import torch
 import py3Dmol
 from huggingface_hub import login
-# import spaces
 from esm.utils.structure.protein_chain import ProteinChain
 from esm.models.esm3 import ESM3
@@ -16,60 +16,107 @@ from esm.sdk.api import (
 from gradio_molecule3d import Molecule3D
 theme = gr.themes.Monochrome(
     primary_hue="gray",
 )
-## Function to get model from Hugging Face using token
-# @spaces.GPU()
 def get_model(model_name, token):
-    login(token=token)
-    if torch.cuda.is_available():
-        model = ESM3.from_pretrained(model_name, device=torch.device("cuda"))
-    else:
-        model = ESM3.from_pretrained(model_name, device=torch.device("cpu"))
     # model = ESM3.from_pretrained(model_name, device=torch.device("cpu"))
     return model
-## Function to get PDB data
 def get_pdb(pdb_id, chain_id):
     pdb = ProteinChain.from_rcsb(pdb_id, chain_id)
     # return [pdb.sequence, render_pdb(pdb.to_pdb_string())]
     return pdb
-## Function to generate rep for 3D structure
 def make_reps(res_start=None, res_end=None, main_color="whiteCarbon", highlight_color="redCarbon", main_style="cartoon", highlight_style="cartoon"):
     residue_range = f"{res_start}-{res_end}" if res_start != res_end else ""
     return [
-     {
-        "model": 0,
-        "chain": "",
-        "resname": "",
-        "style": main_style,
-        "color": main_color,
-        "residue_range": "",
-        "around": 0,
-        "byres": False,
-        "visible": True
-    },
-    {
-        "model": 0,
-        "chain": "",
-        "resname": "",
-        "style": highlight_style,
-        "color": highlight_color,
-        "residue_range": residue_range,
-        "around": 0,
-        "byres": False,
-        "visible": True
-    }]
-## Function to render 3D structure
 def render_pdb(pdb_id, chain_id, res_start, res_end, pdb_string=None):
     if pdb_string is None:
         pdb_string = get_pdb(pdb_id, chain_id).to_pdb_string()
     ## Write to temporary file and read back in to get the 3D structure
@@ -78,8 +125,25 @@ def render_pdb(pdb_id, chain_id, res_start, res_end, pdb_string=None):
     return Molecule3D(tmp_pdb.name, reps=make_reps(res_start=res_start, res_end=res_end))
-## Function for Scaffolding
-def scaffold(model_name, token, pdb_id, chain_id, motif_start, motif_end, prompt_length, insert_size):
     pdb = get_pdb(pdb_id, chain_id)
     ## Get motif sequence and atom37 positions
@@ -102,7 +166,7 @@ def scaffold(model_name, token, pdb_id, chain_id, motif_start, motif_end, prompt
                                                   num_steps=sequence_prompt.count("_") // 2,
                                                   temperature=0.5)
     ## Generate sequence
-    model = get_model(model_name, token)
     sequence_generation = model.generate(protein_prompt, sequence_generation_config)
     generated_sequence = sequence_generation.sequence
@@ -110,7 +174,7 @@ def scaffold(model_name, token, pdb_id, chain_id, motif_start, motif_end, prompt
     structure_prediction_config = GenerationConfig(
         track="structure", # We want ESM3 to generate tokens for the structure track
         num_steps=len(sequence_generation) // 8,
-        temperature=0.7,
     )
     structure_prediction_prompt = ESMProtein(sequence=sequence_generation.sequence)
     structure_prediction = model.generate(structure_prediction_prompt, structure_prediction_config)
@@ -121,14 +185,14 @@ def scaffold(model_name, token, pdb_id, chain_id, motif_start, motif_end, prompt
     # crmsd = structure_prediction_chain.rmsd(renal_dipep_chain, mobile_inds=motif_inds_in_generation, target_inds=motif_inds)
     structure_orig_highlight = render_pdb(pdb_id, chain_id, res_start=motif_start, res_end=motif_end)
-    structure_new_highlight = render_pdb(pdb_id, chain_id, res_start=insert_size, res_end=insert_size+len(motif_sequence),
                                          pdb_string=structure_prediction_chain.to_pdb_string())
     return [
         pdb.sequence,
         motif_sequence,
         structure_orig_highlight,
-        # motif_atom37_positions,
         sequence_prompt,
         # structure_prompt,
         # protein_prompt
@@ -138,8 +202,23 @@ def scaffold(model_name, token, pdb_id, chain_id, motif_start, motif_end, prompt
         structure_new_highlight
     ]
-## Function for Secondary Structure Editing
-def ss_edit(model_name, token, pdb_id, chain_id, region_start, region_end, shortened_region_length, shortening_ss8):
     pdb = get_pdb(pdb_id, chain_id)
     edit_region = np.arange(region_start, region_end)
@@ -148,21 +227,21 @@ def ss_edit(model_name, token, pdb_id, chain_id, region_start, region_end, short
     ## Construct a secondary structure prompt that retains the secondary structure of the flanking regions, and shortens the lengths of helices in the helix-coil-helix region
     ss8_prompt = shortening_ss8[:edit_region[0]] + (((shortened_region_length - 3) // 2) * "H" + "C"*3 + ((shortened_region_length - 3) // 2) * "H") + shortening_ss8[edit_region[-1] + 1:]
     ## Save original sequence and secondary structure
     original_sequence = pdb.sequence
     original_ss8 = shortening_ss8
     original_ss8_region = " "*edit_region[0] + shortening_ss8[edit_region[0]:edit_region[-1]+1]
     proposed_ss8_region = " "*edit_region[0] + ss8_prompt[edit_region[0]:edit_region[0]+shortened_region_length]
     ## Create protein prompt
     protein_prompt = ESMProtein(sequence=sequence_prompt, secondary_structure=ss8_prompt)
     ## Generatre sequence
-    model = get_model(model_name, token)
     sequence_generation = model.generate(protein_prompt, GenerationConfig(track="sequence", num_steps=protein_prompt.sequence.count("_") // 2, temperature=0.5))
     ## Generate structure
     structure_prediction = model.generate(ESMProtein(sequence=sequence_generation.sequence), GenerationConfig(track="structure", num_steps=len(protein_prompt) // 4, temperature=0))
     structure_prediction_chain = structure_prediction.to_protein_chain()
@@ -184,19 +263,34 @@ def ss_edit(model_name, token, pdb_id, chain_id, region_start, region_end, short
         structure_new_highlight
         ]
-## Function for SASA Editing
-def sasa_edit(model_name, token, pdb_id, chain_id, span_start, span_end, n_samples):
     pdb = get_pdb(pdb_id, chain_id)
     structure_prompt = torch.full((len(pdb), 37, 3), torch.nan)
-    structure_prompt[span_start:span_end] = torch.tensor(pdb[span_start:span_end].atom37_positions, dtype=torch.float32)
     sasa_prompt = [None]*len(pdb)
     sasa_prompt[span_start:span_end] = [40.0]*(span_end - span_start)
     protein_prompt = ESMProtein(sequence="_"*len(pdb), coordinates=structure_prompt, sasa=sasa_prompt)
-    model = get_model(model_name, token)
     generated_proteins = []
     for i in range(n_samples):
@@ -227,7 +321,7 @@ scaffold_app = gr.Interface(
     fn=scaffold,
     inputs=[
         gr.Dropdown(label="Model Name", choices=["esm3_sm_open_v1"], value="esm3_sm_open_v1", allow_custom_value=True),
-        gr.Textbox(value = "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", label="Hugging Face Token", type="password"),
         gr.Textbox(value="1ITU", label = "PDB Code"),
         gr.Textbox(value="A", label = "Chain"),
         gr.Number(value=123, label="Motif Start"),
@@ -253,7 +347,7 @@ ss_app = gr.Interface(
     fn=ss_edit,
     inputs=[
         gr.Dropdown(label="Model Name", choices=["esm3_sm_open_v1"], value="esm3_sm_open_v1", allow_custom_value=True),
-        gr.Textbox(value = "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", label="Hugging Face Token", type="password"),
         gr.Textbox(value = "7XBQ", label="PDB ID"),
         gr.Textbox(value = "A", label="Chain ID"),
         gr.Number(value=38, label="Edit Region Start"),
@@ -280,12 +374,12 @@ sasa_app = gr.Interface(
     fn=sasa_edit,
     inputs=[
         gr.Dropdown(label="Model Name", choices=["esm3_sm_open_v1"], value="esm3_sm_open_v1", allow_custom_value=True),
-        gr.Textbox(value = "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", label="Hugging Face Token", type="password"),
         gr.Textbox(value = "1LBS", label="PDB ID"),
         gr.Textbox(value = "A", label="Chain ID"),
         gr.Number(value=105, label="Span Start"),
         gr.Number(value=116, label="Span End"),
-        # gr.Textbox(value="CCSSCCCCSSCHHHHHHTEEETTBBTTBCSSEEEEECCTTCCHHHHHTTTHHHHHHHTTCEEEEECCTTTTCSCHHHHHHHHHHHHHHHHHHTTSCCEEEEEETHHHHHHHHHHHHCGGGGGTEEEEEEESCCTTCBGGGHHHHHTTCBCHHHHHTBTTCHHHHHHHHTTTTBCSSCEEEEECTTCSSSCCCCSSSTTSTTCCBTSEEEEHHHHHCTTCCCCSHHHHHBHHHHHHHHHHHHCTTSSCCGGGCCSTTCCCSBCTTSCHHHHHHHHSTHHHHHHHHHHSCCBSSCCCCCGGGGGGSTTCEETTEECCC", label="SS8 String")
         gr.Number(value=1, label="Number of Samples")
     ],
     outputs = [
@@ -323,9 +417,9 @@ with gr.Blocks(theme=theme) as esm_app:
             - GitHub: https://github.com/evolutionaryscale/esm
             - HuggingFace Model: https://huggingface.co/EvolutionaryScale/esm3-sm-open-v1
-            Spaces App By: [[Colby T. Ford](https://colbyford.com)] from [Tuple, The Cloud Genomics Company](https://tuple.xyz)
-            NOTE: You will need to agree to EvolutionaryScale's [license agreement](https://huggingface.co/EvolutionaryScale/esm3-sm-open-v1) to use the model. Then, create and paste your HuggingFace token in the appropriate field.
             """
         )
     with gr.Row():
@@ -343,4 +437,4 @@ with gr.Blocks(theme=theme) as esm_app:
             ])
 if __name__ == "__main__":
-    esm_app.launch()

+import spaces
 import gradio as gr
 import numpy as np
 import os, tempfile
 import torch
 import py3Dmol
 from huggingface_hub import login
 from esm.utils.structure.protein_chain import ProteinChain
 from esm.models.esm3 import ESM3
 from gradio_molecule3d import Molecule3D
+# --- Retrieve the HF token from the Space's secrets ---
+HF_TOKEN = os.getenv("HF_TOKEN")
 theme = gr.themes.Monochrome(
     primary_hue="gray",
 )
 def get_model(model_name, token):
+    """
+    Logs into Hugging Face and loads a specified ESM3 model.
+    Args:
+        model_name (str): The name of the model to load from Hugging Face.
+        token (str): The Hugging Face authentication token.
+    Returns:
+        ESM3: The loaded ESM3 model moved to the CUDA device.
+    """
+    if token:
+        login(token=token)
+    # if torch.cuda.is_available():
+    model = ESM3.from_pretrained(model_name, device=torch.device("cuda"))
+    # else:
+    #     model = ESM3.from_pretrained(model_name, device=torch.device("cpu"))
     # model = ESM3.from_pretrained(model_name, device=torch.device("cpu"))
     return model
 def get_pdb(pdb_id, chain_id):
+    """
+    Fetches a protein structure from the RCSB PDB database.
+    Args:
+        pdb_id (str): The 4-character PDB identifier.
+        chain_id (str): The specific chain identifier within the PDB file.
+    Returns:
+        ProteinChain: An object representing the specified protein chain.
+    """
     pdb = ProteinChain.from_rcsb(pdb_id, chain_id)
     # return [pdb.sequence, render_pdb(pdb.to_pdb_string())]
     return pdb
 def make_reps(res_start=None, res_end=None, main_color="whiteCarbon", highlight_color="redCarbon", main_style="cartoon", highlight_style="cartoon"):
+    """
+    Creates representation styles for 3D protein visualization with py3Dmol.
+    Args:
+        res_start (int, optional): The starting residue for highlighting. Defaults to None.
+        res_end (int, optional): The ending residue for highlighting. Defaults to None.
+        main_color (str, optional): The color for the main protein structure. Defaults to "whiteCarbon".
+        highlight_color (str, optional): The color for the highlighted region. Defaults to "redCarbon".
+        main_style (str, optional): The style for the main structure (e.g., 'cartoon'). Defaults to "cartoon".
+        highlight_style (str, optional): The style for the highlighted region. Defaults to "cartoon".
+    Returns:
+        list: A list of dictionary objects defining the py3Dmol styles.
+    """
     residue_range = f"{res_start}-{res_end}" if res_start != res_end else ""
     return [
+      {
+          "model": 0,
+          "chain": "",
+          "resname": "",
+          "style": main_style,
+          "color": main_color,
+          "residue_range": "",
+          "around": 0,
+          "byres": False,
+          "visible": True
+      },
+      {
+          "model": 0,
+          "chain": "",
+          "resname": "",
+          "style": highlight_style,
+          "color": highlight_color,
+          "residue_range": residue_range,
+          "around": 0,
+          "byres": False,
+          "visible": True
+      }]
 def render_pdb(pdb_id, chain_id, res_start, res_end, pdb_string=None):
+    """
+    Renders a PDB structure for display in a Gradio Molecule3D component.
+    Args:
+        pdb_id (str): The PDB ID, used for file naming.
+        chain_id (str): The chain ID, used for file naming.
+        res_start (int): The starting residue for highlighting.
+        res_end (int): The ending residue for highlighting.
+        pdb_string (str, optional): A string containing PDB data. If None,
+                                    it will be fetched from RCSB. Defaults to None.
+    Returns:
+        Molecule3D: A Gradio component object for 3D visualization.
+    """
     if pdb_string is None:
         pdb_string = get_pdb(pdb_id, chain_id).to_pdb_string()
     ## Write to temporary file and read back in to get the 3D structure
     return Molecule3D(tmp_pdb.name, reps=make_reps(res_start=res_start, res_end=res_end))
+@spaces.GPU()
+def scaffold(model_name, pdb_id, chain_id, motif_start, motif_end, prompt_length, insert_size):
+    """
+    Performs protein scaffolding by generating a new protein structure around a
+    functional motif from an existing protein.
+    Args:
+        model_name (str): The ESM3 model to use.
+        pdb_id (str): PDB ID of the source protein.
+        chain_id (str): Chain of the source protein.
+        motif_start (int): Starting residue of the motif.
+        motif_end (int): Ending residue of the motif.
+        prompt_length (int): Total length of the new protein sequence.
+        insert_size (int): Position where the motif will be inserted.
+    Returns:
+        list: A list of outputs for the Gradio interface, including sequences
+              and 3D structures.
+    """
     pdb = get_pdb(pdb_id, chain_id)
     ## Get motif sequence and atom37 positions
                                                   num_steps=sequence_prompt.count("_") // 2,
                                                   temperature=0.5)
     ## Generate sequence
+    model = get_model(model_name, HF_TOKEN)
     sequence_generation = model.generate(protein_prompt, sequence_generation_config)
     generated_sequence = sequence_generation.sequence
     structure_prediction_config = GenerationConfig(
         track="structure", # We want ESM3 to generate tokens for the structure track
         num_steps=len(sequence_generation) // 8,
+        temperature=0.7,
     )
     structure_prediction_prompt = ESMProtein(sequence=sequence_generation.sequence)
     structure_prediction = model.generate(structure_prediction_prompt, structure_prediction_config)
     # crmsd = structure_prediction_chain.rmsd(renal_dipep_chain, mobile_inds=motif_inds_in_generation, target_inds=motif_inds)
     structure_orig_highlight = render_pdb(pdb_id, chain_id, res_start=motif_start, res_end=motif_end)
+    structure_new_highlight = render_pdb(pdb_id, chain_id, res_start=insert_size, res_end=insert_size+len(motif_sequence),
                                          pdb_string=structure_prediction_chain.to_pdb_string())
     return [
         pdb.sequence,
         motif_sequence,
         structure_orig_highlight,
+        # gr.Textbox(label="Motif Positions")
         sequence_prompt,
         # structure_prompt,
         # protein_prompt
         structure_new_highlight
     ]
+@spaces.GPU()
+def ss_edit(model_name, pdb_id, chain_id, region_start, region_end, shortened_region_length, shortening_ss8):
+    """
+    Edits the secondary structure of a protein, for example, by shortening a helix.
+    Args:
+        model_name (str): The ESM3 model to use.
+        pdb_id (str): PDB ID of the source protein.
+        chain_id (str): Chain of the source protein.
+        region_start (int): Starting residue of the region to edit.
+        region_end (int): Ending residue of the region to edit.
+        shortened_region_length (int): The new length of the edited region.
+        shortening_ss8 (str): The 8-state secondary structure string for the original protein.
+    Returns:
+        list: A list of outputs for the Gradio interface.
+    """
     pdb = get_pdb(pdb_id, chain_id)
     edit_region = np.arange(region_start, region_end)
     ## Construct a secondary structure prompt that retains the secondary structure of the flanking regions, and shortens the lengths of helices in the helix-coil-helix region
     ss8_prompt = shortening_ss8[:edit_region[0]] + (((shortened_region_length - 3) // 2) * "H" + "C"*3 + ((shortened_region_length - 3) // 2) * "H") + shortening_ss8[edit_region[-1] + 1:]
     ## Save original sequence and secondary structure
     original_sequence = pdb.sequence
     original_ss8 = shortening_ss8
     original_ss8_region = " "*edit_region[0] + shortening_ss8[edit_region[0]:edit_region[-1]+1]
     proposed_ss8_region = " "*edit_region[0] + ss8_prompt[edit_region[0]:edit_region[0]+shortened_region_length]
     ## Create protein prompt
     protein_prompt = ESMProtein(sequence=sequence_prompt, secondary_structure=ss8_prompt)
     ## Generatre sequence
+    model = get_model(model_name, HF_TOKEN)
     sequence_generation = model.generate(protein_prompt, GenerationConfig(track="sequence", num_steps=protein_prompt.sequence.count("_") // 2, temperature=0.5))
     ## Generate structure
     structure_prediction = model.generate(ESMProtein(sequence=sequence_generation.sequence), GenerationConfig(track="structure", num_steps=len(protein_prompt) // 4, temperature=0))
     structure_prediction_chain = structure_prediction.to_protein_chain()
         structure_new_highlight
         ]
+@spaces.GPU()
+def sasa_edit(model_name, pdb_id, chain_id, span_start, span_end, n_samples):
+    """
+    Edits a protein region to increase its solvent-accessible surface area (SASA).
+    Args:
+        model_name (str): The ESM3 model to use.
+        pdb_id (str): PDB ID of the source protein.
+        chain_id (str): Chain of the source protein.
+        span_start (int): Starting residue of the region to edit.
+        span_end (int): Ending residue of the region to edit.
+        n_samples (int): The number of new designs to generate.
+    Returns:
+        list: A list of outputs for the Gradio interface, including the best
+              generated structure.
+    """
     pdb = get_pdb(pdb_id, chain_id)
     structure_prompt = torch.full((len(pdb), 37, 3), torch.nan)
+    structure_prompt[span_start:span_end] = torch.tensor(pdb[span_start:span_end].atom37_positions, dtype=torch.float32)
     sasa_prompt = [None]*len(pdb)
     sasa_prompt[span_start:span_end] = [40.0]*(span_end - span_start)
     protein_prompt = ESMProtein(sequence="_"*len(pdb), coordinates=structure_prompt, sasa=sasa_prompt)
+    model = get_model(model_name, HF_TOKEN)
     generated_proteins = []
     for i in range(n_samples):
     fn=scaffold,
     inputs=[
         gr.Dropdown(label="Model Name", choices=["esm3_sm_open_v1"], value="esm3_sm_open_v1", allow_custom_value=True),
+        # gr.Textbox(value = "hf_...", label="Hugging Face Token", type="password"),
         gr.Textbox(value="1ITU", label = "PDB Code"),
         gr.Textbox(value="A", label = "Chain"),
         gr.Number(value=123, label="Motif Start"),
     fn=ss_edit,
     inputs=[
         gr.Dropdown(label="Model Name", choices=["esm3_sm_open_v1"], value="esm3_sm_open_v1", allow_custom_value=True),
+        # gr.Textbox(value = "hf_...", label="Hugging Face Token", type="password"),
         gr.Textbox(value = "7XBQ", label="PDB ID"),
         gr.Textbox(value = "A", label="Chain ID"),
         gr.Number(value=38, label="Edit Region Start"),
     fn=sasa_edit,
     inputs=[
         gr.Dropdown(label="Model Name", choices=["esm3_sm_open_v1"], value="esm3_sm_open_v1", allow_custom_value=True),
+        # gr.Textbox(value = "hf_...", label="Hugging Face Token", type="password"),
         gr.Textbox(value = "1LBS", label="PDB ID"),
         gr.Textbox(value = "A", label="Chain ID"),
         gr.Number(value=105, label="Span Start"),
         gr.Number(value=116, label="Span End"),
+        # gr.Textbox(value="...", label="SS8 String")
         gr.Number(value=1, label="Number of Samples")
     ],
     outputs = [
             - GitHub: https://github.com/evolutionaryscale/esm
             - HuggingFace Model: https://huggingface.co/EvolutionaryScale/esm3-sm-open-v1
+            Spaces App By: [[Colby T. Ford](https://colbyford.com)] from [Tuple, The Cloud Genomics Company](https://tuple.xyz)
+            NOTE: You will need to agree to EvolutionaryScale's [license agreement](https://huggingface.co/EvolutionaryScale/esm3-sm-open-v1) to use the model. This space uses a stored token for API access.
             """
         )
     with gr.Row():
             ])
 if __name__ == "__main__":
+    esm_app.launch(mcp_server=True)