Subh775 commited on
Commit
aff1b1a
·
verified ·
1 Parent(s): 128d10c

Update longchat_instructions.py

Browse files
Files changed (1) hide show
  1. longchat_instructions.py +13 -6
longchat_instructions.py CHANGED
@@ -25,10 +25,13 @@ model, tokenizer = FastLanguageModel.from_pretrained(
25
  FastLanguageModel.for_inference(model)
26
 
27
 
28
- # Function to handle the chat loop
 
29
  def chat():
30
  print("Chat with the model! Type '\\q' or 'quit' to stop.\n")
31
 
 
 
32
  while True:
33
  # Get user input
34
  user_input = input("You: ")
@@ -38,15 +41,16 @@ def chat():
38
  print("\nExiting the chat. Goodbye!")
39
  break
40
 
41
- # Prepare prompt with user input
42
  prompt = alpaca_prompt.format(
43
  instruction="Please answer the following medical question.",
44
  input_text=user_input,
45
  output=""
46
  )
 
47
 
48
- # Tokenize input and move to GPU
49
- inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
50
 
51
  # Generate output with configured parameters
52
  outputs = model.generate(
@@ -61,10 +65,13 @@ def chat():
61
 
62
  # Decode and clean the model's response
63
  decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
64
- clean_output = decoded_output[0].split('### Response:')[1].strip()
 
 
 
65
 
66
  # Display the response
67
  print(f"\nModel: {clean_output}\n")
68
 
69
  # Start the chat
70
- chat()
 
25
  FastLanguageModel.for_inference(model)
26
 
27
 
28
+ # Function to handle the chat loop with memory
29
+
30
  def chat():
31
  print("Chat with the model! Type '\\q' or 'quit' to stop.\n")
32
 
33
+ chat_history = "" # Store the conversation history
34
+
35
  while True:
36
  # Get user input
37
  user_input = input("You: ")
 
41
  print("\nExiting the chat. Goodbye!")
42
  break
43
 
44
+ # Append the current input to chat history with instruction formatting
45
  prompt = alpaca_prompt.format(
46
  instruction="Please answer the following medical question.",
47
  input_text=user_input,
48
  output=""
49
  )
50
+ chat_history += prompt + "\n"
51
 
52
+ # Tokenize combined history and move to GPU
53
+ inputs = tokenizer([chat_history], return_tensors="pt").to("cuda")
54
 
55
  # Generate output with configured parameters
56
  outputs = model.generate(
 
65
 
66
  # Decode and clean the model's response
67
  decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)
68
+ clean_output = decoded_output[0].split('### Response:')[-1].strip()
69
+
70
+ # Add the response to chat history
71
+ chat_history += f": {clean_output}\n"
72
 
73
  # Display the response
74
  print(f"\nModel: {clean_output}\n")
75
 
76
  # Start the chat
77
+ chat()