ayyuce commited on
Commit
4f07e20
·
verified ·
1 Parent(s): 115a34b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -5
app.py CHANGED
@@ -1,18 +1,42 @@
1
  import streamlit as st
2
- from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  @st.cache_resource(show_spinner=False)
5
  def load_generator():
6
- model_name = "ayyuce/NeoProtein-GPT"
7
- config = AutoConfig.from_pretrained(model_name, model_type="gpt2")
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForCausalLM.from_pretrained(model_name, config=config, device_map="cpu")
10
  gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
11
  return gen_pipeline
12
 
13
  st.title("NeoProtein-GPT")
14
  st.write("Welcome to the NeoProtein-GPT interface. Enter a protein prompt and generate new protein sequences!")
15
 
 
16
  user_prompt = st.text_area("Enter your prompt", value="Design a novel protein sequence with a unique binding site:")
17
 
18
  if st.button("Generate Protein Sequence"):
@@ -20,4 +44,4 @@ if st.button("Generate Protein Sequence"):
20
  outputs = load_generator()(user_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
21
  generated_text = outputs[0]["generated_text"]
22
  st.subheader("Generated Sequence:")
23
- st.code(generated_text, language="python")
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoConfig
3
+ import json
4
+ import os
5
+
6
+ model_name = "ayyuce/NeoProtein-GPT"
7
+ config_path = os.path.join(os.path.expanduser("~"), f".cache/huggingface/hub/models--{model_name.replace('/', '--')}/snapshots/d462becc43e0c3e4792cfa78efd029bed5dcfeb8/config.json")
8
+
9
+ if not os.path.isfile(config_path):
10
+ config = {
11
+ "model_type": "gpt2",
12
+ "architectures": ["GPT2LMHeadModel"],
13
+ "vocab_size": 50257,
14
+ "n_positions": 1024,
15
+ "n_ctx": 1024,
16
+ "n_embd": 768,
17
+ "n_layer": 12,
18
+ "n_head": 12,
19
+ "activation_function": "gelu",
20
+ "initializer_range": 0.02,
21
+ "layer_norm_epsilon": 1e-5,
22
+ "bos_token_id": 50256,
23
+ "eos_token_id": 50256,
24
+ }
25
+ os.makedirs(os.path.dirname(config_path), exist_ok=True)
26
+ with open(config_path, "w") as f:
27
+ json.dump(config, f)
28
 
29
  @st.cache_resource(show_spinner=False)
30
  def load_generator():
 
 
31
  tokenizer = AutoTokenizer.from_pretrained(model_name)
32
+ model = AutoModelForCausalLM.from_pretrained(model_name, config=AutoConfig.from_pretrained(model_name))
33
  gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
34
  return gen_pipeline
35
 
36
  st.title("NeoProtein-GPT")
37
  st.write("Welcome to the NeoProtein-GPT interface. Enter a protein prompt and generate new protein sequences!")
38
 
39
+
40
  user_prompt = st.text_area("Enter your prompt", value="Design a novel protein sequence with a unique binding site:")
41
 
42
  if st.button("Generate Protein Sequence"):
 
44
  outputs = load_generator()(user_prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
45
  generated_text = outputs[0]["generated_text"]
46
  st.subheader("Generated Sequence:")
47
+ st.code(generated_text, language="python")