from model import tokenizer, model

HISTORY = []  # Initialize an empty history list

def chat(message, history):
    """
    Handles user input, generates a response using the model, and updates the chat history.
    """
    # Combine history with the current message
    conversation = "\n".join(history) + f"\nUser: {message}\nAssistant:"

    # Tokenize and generate response
    inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024).to("cuda")
    outputs = model.generate(inputs.input_ids, max_length=1024, temperature=0.7, do_sample=True)
    reply = tokenizer.decode(outputs[:, inputs.input_ids.shape[-1]:][0], skip_special_tokens=True)

    # Update history
    update_history(message, reply)
    return reply

def update_history(message, reply):
    """
    Update the global history with the latest message and reply.
    """
    global HISTORY
    HISTORY.append(f"User: {message}")
    HISTORY.append(f"Assistant: {reply}")

def get_history():
    """
    Retrieve the chat history as a string.
    """
    return "\n".join(HISTORY)