burtenshaw commited on
Commit
17ce9a8
·
1 Parent(s): 3372a22

pass args to generation pipeline

Browse files
Files changed (1) hide show
  1. app/app.py +7 -5
app/app.py CHANGED
@@ -13,7 +13,7 @@ from feedback import save_feedback, scheduler
13
  from gradio.components.chatbot import Option
14
  from huggingface_hub import InferenceClient
15
  from pandas import DataFrame
16
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
17
 
18
 
19
  LANGUAGES: dict[str, str] = {
@@ -58,9 +58,7 @@ def create_inference_client(
58
  """
59
  if ZERO_GPU:
60
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
61
- model = AutoModelForCausalLM.from_pretrained(
62
- BASE_MODEL, load_in_8bit=True, max_new_tokens=2000
63
- )
64
  return pipeline(
65
  "text-generation",
66
  model=model,
@@ -198,7 +196,11 @@ def add_fake_like_data(
198
 
199
  @spaces.GPU
200
  def call_pipeline(messages: list, language: str):
201
- response = CLIENT(messages)
 
 
 
 
202
  content = response[0]["generated_text"][-1]["content"]
203
  return content
204
 
 
13
  from gradio.components.chatbot import Option
14
  from huggingface_hub import InferenceClient
15
  from pandas import DataFrame
16
+ from transformers import pipeline, AutoTokenizer, CohereForCausalLM
17
 
18
 
19
  LANGUAGES: dict[str, str] = {
 
58
  """
59
  if ZERO_GPU:
60
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
61
+ model = CohereForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True)
 
 
62
  return pipeline(
63
  "text-generation",
64
  model=model,
 
196
 
197
  @spaces.GPU
198
  def call_pipeline(messages: list, language: str):
199
+ response = CLIENT(
200
+ messages,
201
+ clean_up_tokenization_spaces=False,
202
+ generate_kwargs={"max_length": 2000},
203
+ )
204
  content = response[0]["generated_text"][-1]["content"]
205
  return content
206