from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained("Norod78/hebrew-gpt_neo-xl", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("Norod78/hebrew-gpt_neo-xl", pad_token_id=tokenizer.eos_token_id)
model.to(device)

def chat(message, history=[]):
    new_user_input_ids = tokenizer.encode(tokenizer.eos_token + message, return_tensors='pt')
    if  len(history) > 0:
        last_set_of_ids = history[len(history)-1][2]
        bot_input_ids = torch.cat([last_set_of_ids, new_user_input_ids], dim=-1) 
    else:
        bot_input_ids = new_user_input_ids
    chat_history_ids = model.generate(bot_input_ids, max_length=5000, pad_token_id=tokenizer.eos_token_id)
    response_ids = chat_history_ids[:, bot_input_ids.shape[-1]:][0]
    response = tokenizer.decode(response_ids, skip_special_tokens=True)
    history.append((message, response, chat_history_ids))
    return history, history, topic(message)

def topic(text):
    return 'topic'


title = "ArikGPT"
description = "Trained on a database of chats, jokes and news in the Hebrew language"
article = "Based on [EleutherAI's gpt-neo](https://github.com/EleutherAI/gpt-neo) 1.37B params"
examples = [
            ["מה אומר?"],
            ["מה שוקל יותר, טון נוצות או טון זהב?"],
]

demo = gr.Interface(
    chat,
    [gr.Textbox(label="Send messages here"), "state"],
    [gr.Chatbot(label='Conversation'), "state", gr.Textbox(
            label="Topic",
            lines=1
        )],
    allow_screenshot=False,
    allow_flagging="never",
    title=title, 
    description=description, 
    article=article, 
    examples=examples)
demo.launch()