import gradio as gr from transformers import ( AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, BitsAndBytesConfig, pipeline ) import torch # CHAT MODEL chat_model_id = "MaziyarPanahi/Llama-3.2-3B-Instruct-GGUF" chat_gguf = "Llama-3.2-3B-Instruct.Q4_K_M.gguf" tokenizer = AutoTokenizer.from_pretrained(chat_model_id, gguf_file=chat_gguf) model = AutoModelForCausalLM.from_pretrained(chat_model_id, gguf_file=chat_gguf) chat_pipeline = pipeline('text-generation', model=model, tokenizer=tokenizer, do_sample=True, temperature=0.5, truncation=True, max_length=512, return_full_text=False) # TRANSLATION MODELS fw_modelcard = "amurienne/gallek-m2m100" bw_modelcard = "amurienne/kellag-m2m100" fw_model = AutoModelForSeq2SeqLM.from_pretrained(fw_modelcard) fw_tokenizer = AutoTokenizer.from_pretrained(fw_modelcard) fw_translation_pipeline = pipeline("translation", model=fw_model, tokenizer=fw_tokenizer, src_lang='fr', tgt_lang='br', max_length=400, device="cpu") bw_model = AutoModelForSeq2SeqLM.from_pretrained(bw_modelcard) bw_tokenizer = AutoTokenizer.from_pretrained(bw_modelcard) bw_translation_pipeline = pipeline("translation", model=bw_model, tokenizer=bw_tokenizer, src_lang='br', tgt_lang='fr', max_length=400, device="cpu") # translation function def translate(text, forward: bool): if forward: return fw_translation_pipeline("traduis de français en breton: " + text)[0]['translation_text'] else: return bw_translation_pipeline("treiñ eus ar galleg d'ar brezhoneg: " + text)[0]['translation_text'] # answer function def answer(text): return chat_pipeline(text, chat_template=None)[0]['generated_text'] def format_prompt_with_history(message, native_chat_history): # format the conversation history prompt = "" for interaction in native_chat_history: prompt += f"<|start_header_id|>{interaction['role']}<|end_header_id|>\n{interaction['content']}<|eot_id|>\n" # add the current user message prompt += f"<|start_header_id|>user<|end_header_id|>\ntu es un assistant francophone. Répond en une seule phrase sans formattage.<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n" return prompt # maximum number of interactions to keep in history max_history_length = 3 # keep a hidden model "native" language chat history native_chat_history = [] with gr.Blocks(theme=gr.themes.Soft()) as demo: chatbot = gr.Chatbot(label="Breton Chatbot (Translation based)", type="messages") msg = gr.Textbox(label='User Input') def clear(chat_history): """ Handles clearing chat """ chat_history.clear() native_chat_history.clear() chatbot.clear(clear, inputs=[chatbot]) def respond(message, chat_history): """ Handles bot response generation """ global native_chat_history fr_message = translate(message, forward=False) print(f"user fr -> {fr_message}") prompt = format_prompt_with_history(fr_message, native_chat_history) bot_fr_message = answer(prompt) print(f"bot fr -> {bot_fr_message}") bot_br_message = translate( bot_fr_message, forward=True) print(f"bot br -> {bot_br_message}") chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": bot_br_message}) native_chat_history.append({"role": "user", "content": fr_message}) native_chat_history.append({"role": "assistant", "content": bot_fr_message}) # limit the history length if len(chat_history) > max_history_length * 2: chat_history = chat_history[-max_history_length * 2:] native_chat_history = native_chat_history[-max_history_length * 2:] return "", chat_history msg.submit(respond, [msg, chatbot], [msg, chatbot]) if __name__ == "__main__": demo.launch()