Spaces:

ayyuce
/

NeoProtein-GPT

Running

App Files Files Community

ayyuce commited on Mar 22

Commit

1ae49ba

verified ·

1 Parent(s): 4f07e20

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -40

app.py CHANGED Viewed

@@ -1,47 +1,90 @@
 import streamlit as st
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoConfig
-import json
-import os
-model_name = "ayyuce/NeoProtein-GPT"
-config_path = os.path.join(os.path.expanduser("~"), f".cache/huggingface/hub/models--{model_name.replace('/', '--')}/snapshots/d462becc43e0c3e4792cfa78efd029bed5dcfeb8/config.json")
-if not os.path.isfile(config_path):
-    config = {
-        "model_type": "gpt2",
-        "architectures": ["GPT2LMHeadModel"],
-        "vocab_size": 50257,
-        "n_positions": 1024,
-        "n_ctx": 1024,
-        "n_embd": 768,
-        "n_layer": 12,
-        "n_head": 12,
-        "activation_function": "gelu",
-        "initializer_range": 0.02,
-        "layer_norm_epsilon": 1e-5,
-        "bos_token_id": 50256,
-        "eos_token_id": 50256,
-    }
-    os.makedirs(os.path.dirname(config_path), exist_ok=True)
-    with open(config_path, "w") as f:
-        json.dump(config, f)
 @st.cache_resource(show_spinner=False)
-def load_generator():
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForCausalLM.from_pretrained(model_name, config=AutoConfig.from_pretrained(model_name))
-    gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
-    return gen_pipeline
-st.title("NeoProtein-GPT")
-st.write("Welcome to the NeoProtein-GPT interface. Enter a protein prompt and generate new protein sequences!")
-user_prompt = st.text_area("Enter your prompt", value="Design a novel protein sequence with a unique binding site:")
-if st.button("Generate Protein Sequence"):
-    with st.spinner("Generating sequence..."):
-        outputs = load_generator()(user_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
-        generated_text = outputs[0]["generated_text"]
-    st.subheader("Generated Sequence:")
-    st.code(generated_text, language="python")

 import streamlit as st
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+st.set_page_config(page_title="NeoProtein Designer", page_icon="🧬")
+st.title("🧬 NeoProtein-GPT Protein Designer")
+st.markdown("""
+### Design novel protein sequences with unique binding sites
+*Using the [NeoProtein-GPT](https://huggingface.co/ayyuce/NeoProtein-GPT) model from Hugging Face*
+""")
+with st.sidebar:
+    st.header("Parameters")
+    binding_motif = st.text_input("Binding site motif (e.g., AXXC):", help="Use X for wildcard positions")
+    seq_length = st.slider("Sequence length", 50, 500, 150)
+    temperature = st.slider("Temperature (creativity)", 0.1, 2.0, 1.0)
+    num_sequences = st.slider("Number of sequences", 1, 5, 3)
 @st.cache_resource(show_spinner=False)
+def load_model():
+    model = AutoModelForCausalLM.from_pretrained(
+        "ayyuce/NeoProtein-GPT",
+        device_map="cpu",
+        torch_dtype=torch.float32
+    )
+    tokenizer = AutoTokenizer.from_pretrained("ayyuce/NeoProtein-GPT")
+    return model, tokenizer
+model, tokenizer = load_model()
+def generate_sequences():
+    if not binding_motif:
+        st.error("Please enter a binding motif")
+        return
+    prompt = f"<start>BindingMotif:{binding_motif}<start>Seq:"
+    try:
+        inputs = tokenizer(prompt, return_tensors="pt")
+        input_length = inputs.input_ids.shape[1]
+        outputs = model.generate(
+            inputs.input_ids,
+            max_length=input_length + seq_length,
+            temperature=temperature,
+            do_sample=True,
+            top_k=50,
+            top_p=0.95,
+            num_return_sequences=num_sequences,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        generated_sequences = [
+            tokenizer.decode(output[input_length:], skip_special_tokens=True)
+            for output in outputs
+        ]
+        return generated_sequences
+    except Exception as e:
+        st.error(f"Generation failed: {str(e)}")
+        return []
+if st.button("Generate Protein Sequences"):
+    with st.spinner("Designing novel proteins..."):
+        sequences = generate_sequences()
+    if sequences:
+        st.subheader("Generated Sequences")
+        for i, seq in enumerate(sequences):
+            st.markdown(f"""
+            **Sequence #{i+1}**
+            ```fasta
+            {seq}
+            ```
+            """)
+            st.divider()
+st.markdown("""
+### How to use:
+1. Enter your target binding motif using single-letter amino acid codes
+2. Adjust parameters in the sidebar
+3. Click the generate button
+4. Results will appear in FASTA format
+**Example motifs:**
+- `GHXXXH` for histidine-rich motifs
+- `CXXC` for disulfide bond motifs
+- `DE` for acidic patches
+""")