ykallan commited on
Commit
6459c4b
·
verified ·
1 Parent(s): 9ad4510

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -17
app.py CHANGED
@@ -1,38 +1,45 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
 
9
  pretrained_model = "ykallan/SkuInfo-Qwen2.5-3B-Instruct"
10
- model = AutoModelForCausalLM.from_pretrained(pretrained_model)
11
- tokenizer = AutoTokenizer.from_pretrained(pretrained_model)
12
 
 
13
 
14
  def respond(
15
  message,
 
 
16
  max_tokens,
17
  temperature,
18
  top_p,
19
  ):
20
- messages = [{"role": "system", "content": "在以下商品名称中抽取出品牌、型号、主商品,并以JSON格式返回。"}]
 
 
 
 
 
 
21
 
22
  messages.append({"role": "user", "content": message})
23
- input_ids = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
24
- model_inputs = tokenizer([input_ids], return_tensors="pt", padding=True)
25
-
26
- generate_config = {
27
- "max_new_tokens": 128
28
- }
29
-
30
- generated_ids = model.generate(model_inputs.input_ids, **generate_config)
31
- generated_ids = [
32
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
33
- ]
34
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
35
- return response
 
36
 
37
 
38
  """
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
 
8
  pretrained_model = "ykallan/SkuInfo-Qwen2.5-3B-Instruct"
 
 
9
 
10
+ client = InferenceClient(pretrained_model)
11
 
12
  def respond(
13
  message,
14
+ history: list[tuple[str, str]],
15
+ system_message,
16
  max_tokens,
17
  temperature,
18
  top_p,
19
  ):
20
+ messages = [{"role": "system", "content": system_message}]
21
+
22
+ for val in history:
23
+ if val[0]:
24
+ messages.append({"role": "user", "content": val[0]})
25
+ if val[1]:
26
+ messages.append({"role": "assistant", "content": val[1]})
27
 
28
  messages.append({"role": "user", "content": message})
29
+
30
+ response = ""
31
+
32
+ for message in client.chat_completion(
33
+ messages,
34
+ max_tokens=max_tokens,
35
+ stream=True,
36
+ temperature=temperature,
37
+ top_p=top_p,
38
+ ):
39
+ token = message.choices[0].delta.content
40
+
41
+ response += token
42
+ yield response
43
 
44
 
45
  """