philschmid HF staff commited on
Commit
e5c9ce6
1 Parent(s): 8c68191

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -7
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import gradio as gr
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
4
- from transformers import IteratorStreamer
5
  import torch
6
  from threading import Thread
7
  from huggingface_hub import Repository
@@ -33,10 +32,7 @@ else:
33
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
34
  tokenizer = AutoTokenizer.from_pretrained(model_id)
35
 
36
- prompt_template = f"""### Anweisung:
37
- {{input}}
38
-
39
- ### Antwort:"""
40
 
41
 
42
  def generate(instruction, temperature, max_new_tokens, top_p, length_penalty):
@@ -66,7 +62,7 @@ def generate(instruction, temperature, max_new_tokens, top_p, length_penalty):
66
  # STREAMING BASED ON git+https://github.com/gante/transformers.git@streamer_iterator
67
 
68
  # streaming
69
- streamer = IteratorStreamer(tokenizer)
70
  model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048)
71
  # move to gpu
72
  model_inputs = {k: v.to(device) for k, v in model_inputs.items()}
 
1
  import os
2
  import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TextIteratorStreamer
 
4
  import torch
5
  from threading import Thread
6
  from huggingface_hub import Repository
 
32
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch_dtype, device_map="auto")
33
  tokenizer = AutoTokenizer.from_pretrained(model_id)
34
 
35
+ prompt_template = f"### Anweisung:\n{{input}}\n\n### Antwort:"
 
 
 
36
 
37
 
38
  def generate(instruction, temperature, max_new_tokens, top_p, length_penalty):
 
62
  # STREAMING BASED ON git+https://github.com/gante/transformers.git@streamer_iterator
63
 
64
  # streaming
65
+ streamer = TextIteratorStreamer(tokenizer)
66
  model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048)
67
  # move to gpu
68
  model_inputs = {k: v.to(device) for k, v in model_inputs.items()}