Commit
•
e5c9ce6
1
Parent(s):
8c68191
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
4 |
-
from transformers import IteratorStreamer
|
5 |
import torch
|
6 |
from threading import Thread
|
7 |
from huggingface_hub import Repository
|
@@ -33,10 +32,7 @@ else:
|
|
33 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
|
34 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
35 |
|
36 |
-
prompt_template = f"
|
37 |
-
{{input}}
|
38 |
-
|
39 |
-
### Antwort:"""
|
40 |
|
41 |
|
42 |
def generate(instruction, temperature, max_new_tokens, top_p, length_penalty):
|
@@ -66,7 +62,7 @@ def generate(instruction, temperature, max_new_tokens, top_p, length_penalty):
|
|
66 |
# STREAMING BASED ON git+https://github.com/gante/transformers.git@streamer_iterator
|
67 |
|
68 |
# streaming
|
69 |
-
streamer =
|
70 |
model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048)
|
71 |
# move to gpu
|
72 |
model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextIteratorStreamer
|
|
|
4 |
import torch
|
5 |
from threading import Thread
|
6 |
from huggingface_hub import Repository
|
|
|
32 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
|
33 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
34 |
|
35 |
+
prompt_template = f"### Anweisung:\n{{input}}\n\n### Antwort:"
|
|
|
|
|
|
|
36 |
|
37 |
|
38 |
def generate(instruction, temperature, max_new_tokens, top_p, length_penalty):
|
|
|
62 |
# STREAMING BASED ON git+https://github.com/gante/transformers.git@streamer_iterator
|
63 |
|
64 |
# streaming
|
65 |
+
streamer = TextIteratorStreamer(tokenizer)
|
66 |
model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048)
|
67 |
# move to gpu
|
68 |
model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
|