from model import tokenizer, model HISTORY = [] # Initialize an empty history list def chat(message, history): """ Handles user input, generates a response using the model, and updates the chat history. """ # Combine history with the current message conversation = "\n".join(history) + f"\nUser: {message}\nAssistant:" # Tokenize and generate response inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024).to("cuda") outputs = model.generate(inputs.input_ids, max_length=1024, temperature=0.7, do_sample=True) reply = tokenizer.decode(outputs[:, inputs.input_ids.shape[-1]:][0], skip_special_tokens=True) # Update history update_history(message, reply) return reply def update_history(message, reply): """ Update the global history with the latest message and reply. """ global HISTORY HISTORY.append(f"User: {message}") HISTORY.append(f"Assistant: {reply}") def get_history(): """ Retrieve the chat history as a string. """ return "\n".join(HISTORY)