Spaces:

Henrychur
/

MMedS-Llama-3-8B

Sleeping

Henrychur commited on Sep 4, 2024

Commit

40e2669

verified ·

1 Parent(s): 902df4f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ class MedS_Llama3:
         self.model.eval()
         print('Model and tokenizer loaded on CPU!')
-    def chat(self, query: str, instruction: str = "If you are a doctor, please perform clinical consulting with the patient.") -> str:
         input_sentence = f"{instruction}\n\n{query}"
         input_tokens = self.tokenizer(
             input_sentence,
@@ -32,7 +32,7 @@ class MedS_Llama3:
         output = self.model.generate(
             **input_tokens,
-            max_new_tokens=512,  # 降低生成的最大新tokens数目来节省内存
             eos_token_id=128009
         )
@@ -48,17 +48,18 @@ model_path = "Henrychur/MMedS-Llama-3-8B"  # 确保这里是模型的正确路
 chat_model = MedS_Llama3(model_path)
 # 定义 Gradio 接口中使用的响应函数
-def respond(message, system_message):
     # 每次对话结束后清空历史，只使用当前输入和系统指令
-    response = chat_model.chat(query=message, instruction=system_message)
     yield response
 # 设置 Gradio 聊天界面
 demo = gr.Interface(
     fn=respond,
     inputs=[
-        gr.Textbox(label="What is the treatment for diabetes?"),
-        gr.Textbox(value="If you are a doctor, please perform clinical consulting with the patient.", label="System message")
     ],
     outputs="text"
 )

         self.model.eval()
         print('Model and tokenizer loaded on CPU!')
+    def chat(self, query: str, instruction: str, max_output_tokens: int) -> str:
         input_sentence = f"{instruction}\n\n{query}"
         input_tokens = self.tokenizer(
             input_sentence,
         output = self.model.generate(
             **input_tokens,
+            max_new_tokens=max_output_tokens,
             eos_token_id=128009
         )
 chat_model = MedS_Llama3(model_path)
 # 定义 Gradio 接口中使用的响应函数
+def respond(message, system_message, max_output_tokens):
     # 每次对话结束后清空历史，只使用当前输入和系统指令
+    response = chat_model.chat(query=message, instruction=system_message, max_output_tokens=max_output_tokens)
     yield response
 # 设置 Gradio 聊天界面
 demo = gr.Interface(
     fn=respond,
     inputs=[
+        gr.Textbox(label="Your Input"),
+        gr.Textbox(value="If you are a doctor, please perform clinical consulting with the patient.", label="System message"),
+        gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max Output Tokens")
     ],
     outputs="text"
 )