Henrychur commited on
Commit
40e2669
·
verified ·
1 Parent(s): 902df4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -21,7 +21,7 @@ class MedS_Llama3:
21
  self.model.eval()
22
  print('Model and tokenizer loaded on CPU!')
23
 
24
- def chat(self, query: str, instruction: str = "If you are a doctor, please perform clinical consulting with the patient.") -> str:
25
  input_sentence = f"{instruction}\n\n{query}"
26
  input_tokens = self.tokenizer(
27
  input_sentence,
@@ -32,7 +32,7 @@ class MedS_Llama3:
32
 
33
  output = self.model.generate(
34
  **input_tokens,
35
- max_new_tokens=512, # 降低生成的最大新tokens数目来节省内存
36
  eos_token_id=128009
37
  )
38
 
@@ -48,17 +48,18 @@ model_path = "Henrychur/MMedS-Llama-3-8B" # 确保这里是模型的正确路
48
  chat_model = MedS_Llama3(model_path)
49
 
50
  # 定义 Gradio 接口中使用的响应函数
51
- def respond(message, system_message):
52
  # 每次对话结束后清空历史,只使用当前输入和系统指令
53
- response = chat_model.chat(query=message, instruction=system_message)
54
  yield response
55
 
56
  # 设置 Gradio 聊天界面
57
  demo = gr.Interface(
58
  fn=respond,
59
  inputs=[
60
- gr.Textbox(label="What is the treatment for diabetes?"),
61
- gr.Textbox(value="If you are a doctor, please perform clinical consulting with the patient.", label="System message")
 
62
  ],
63
  outputs="text"
64
  )
 
21
  self.model.eval()
22
  print('Model and tokenizer loaded on CPU!')
23
 
24
+ def chat(self, query: str, instruction: str, max_output_tokens: int) -> str:
25
  input_sentence = f"{instruction}\n\n{query}"
26
  input_tokens = self.tokenizer(
27
  input_sentence,
 
32
 
33
  output = self.model.generate(
34
  **input_tokens,
35
+ max_new_tokens=max_output_tokens,
36
  eos_token_id=128009
37
  )
38
 
 
48
  chat_model = MedS_Llama3(model_path)
49
 
50
  # 定义 Gradio 接口中使用的响应函数
51
+ def respond(message, system_message, max_output_tokens):
52
  # 每次对话结束后清空历史,只使用当前输入和系统指令
53
+ response = chat_model.chat(query=message, instruction=system_message, max_output_tokens=max_output_tokens)
54
  yield response
55
 
56
  # 设置 Gradio 聊天界面
57
  demo = gr.Interface(
58
  fn=respond,
59
  inputs=[
60
+ gr.Textbox(label="Your Input"),
61
+ gr.Textbox(value="If you are a doctor, please perform clinical consulting with the patient.", label="System message"),
62
+ gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max Output Tokens")
63
  ],
64
  outputs="text"
65
  )