Update longchat_instructions.py
Browse files- longchat_instructions.py +13 -6
longchat_instructions.py
CHANGED
@@ -25,10 +25,13 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
25 |
FastLanguageModel.for_inference(model)
|
26 |
|
27 |
|
28 |
-
# Function to handle the chat loop
|
|
|
29 |
def chat():
|
30 |
print("Chat with the model! Type '\\q' or 'quit' to stop.\n")
|
31 |
|
|
|
|
|
32 |
while True:
|
33 |
# Get user input
|
34 |
user_input = input("You: ")
|
@@ -38,15 +41,16 @@ def chat():
|
|
38 |
print("\nExiting the chat. Goodbye!")
|
39 |
break
|
40 |
|
41 |
-
#
|
42 |
prompt = alpaca_prompt.format(
|
43 |
instruction="Please answer the following medical question.",
|
44 |
input_text=user_input,
|
45 |
output=""
|
46 |
)
|
|
|
47 |
|
48 |
-
# Tokenize
|
49 |
-
inputs = tokenizer([
|
50 |
|
51 |
# Generate output with configured parameters
|
52 |
outputs = model.generate(
|
@@ -61,10 +65,13 @@ def chat():
|
|
61 |
|
62 |
# Decode and clean the model's response
|
63 |
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
64 |
-
clean_output = decoded_output[0].split('### Response:')[1].strip()
|
|
|
|
|
|
|
65 |
|
66 |
# Display the response
|
67 |
print(f"\nModel: {clean_output}\n")
|
68 |
|
69 |
# Start the chat
|
70 |
-
chat()
|
|
|
25 |
FastLanguageModel.for_inference(model)
|
26 |
|
27 |
|
28 |
+
# Function to handle the chat loop with memory
|
29 |
+
|
30 |
def chat():
|
31 |
print("Chat with the model! Type '\\q' or 'quit' to stop.\n")
|
32 |
|
33 |
+
chat_history = "" # Store the conversation history
|
34 |
+
|
35 |
while True:
|
36 |
# Get user input
|
37 |
user_input = input("You: ")
|
|
|
41 |
print("\nExiting the chat. Goodbye!")
|
42 |
break
|
43 |
|
44 |
+
# Append the current input to chat history with instruction formatting
|
45 |
prompt = alpaca_prompt.format(
|
46 |
instruction="Please answer the following medical question.",
|
47 |
input_text=user_input,
|
48 |
output=""
|
49 |
)
|
50 |
+
chat_history += prompt + "\n"
|
51 |
|
52 |
+
# Tokenize combined history and move to GPU
|
53 |
+
inputs = tokenizer([chat_history], return_tensors="pt").to("cuda")
|
54 |
|
55 |
# Generate output with configured parameters
|
56 |
outputs = model.generate(
|
|
|
65 |
|
66 |
# Decode and clean the model's response
|
67 |
decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
68 |
+
clean_output = decoded_output[0].split('### Response:')[-1].strip()
|
69 |
+
|
70 |
+
# Add the response to chat history
|
71 |
+
chat_history += f": {clean_output}\n"
|
72 |
|
73 |
# Display the response
|
74 |
print(f"\nModel: {clean_output}\n")
|
75 |
|
76 |
# Start the chat
|
77 |
+
chat()
|