Subh775
/

mistral-7b-medical-o1-ft

Text Generation

text-generation-inference

Model card Files Files and versions

Subh775 commited on Apr 23

Commit

aff1b1a

·

verified ·

1 Parent(s): 128d10c

Update longchat_instructions.py

Files changed (1) hide show

longchat_instructions.py +13 -6

longchat_instructions.py CHANGED Viewed

@@ -25,10 +25,13 @@ model, tokenizer = FastLanguageModel.from_pretrained(
 FastLanguageModel.for_inference(model)
-# Function to handle the chat loop
 def chat():
     print("Chat with the model! Type '\\q' or 'quit' to stop.\n")
     while True:
         # Get user input
         user_input = input("You: ")
@@ -38,15 +41,16 @@ def chat():
             print("\nExiting the chat. Goodbye!")
             break
-        # Prepare prompt with user input
         prompt = alpaca_prompt.format(
             instruction="Please answer the following medical question.",
             input_text=user_input,
             output=""
         )
-        # Tokenize input and move to GPU
-        inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
         # Generate output with configured parameters
         outputs = model.generate(
@@ -61,10 +65,13 @@ def chat():
         # Decode and clean the model's response
         decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
-        clean_output = decoded_output[0].split('### Response:')[1].strip()
         # Display the response
         print(f"\nModel: {clean_output}\n")
 # Start the chat
-chat()

 FastLanguageModel.for_inference(model)
+# Function to handle the chat loop with memory
 def chat():
     print("Chat with the model! Type '\\q' or 'quit' to stop.\n")
+    chat_history = ""  # Store the conversation history
     while True:
         # Get user input
         user_input = input("You: ")
             print("\nExiting the chat. Goodbye!")
             break
+        # Append the current input to chat history with instruction formatting
         prompt = alpaca_prompt.format(
             instruction="Please answer the following medical question.",
             input_text=user_input,
             output=""
         )
+        chat_history += prompt + "\n"
+        # Tokenize combined history and move to GPU
+        inputs = tokenizer([chat_history], return_tensors="pt").to("cuda")
         # Generate output with configured parameters
         outputs = model.generate(
         # Decode and clean the model's response
         decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        clean_output = decoded_output[0].split('### Response:')[-1].strip()
+        # Add the response to chat history
+        chat_history += f": {clean_output}\n"
         # Display the response
         print(f"\nModel: {clean_output}\n")
 # Start the chat
+chat()