import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import os

# Nonaktifkan cache Hugging Face untuk hemat penyimpanan
os.environ["HF_HUB_DISABLE_CACHE"] = "1"

# Muat model dan tokenizer
model_name = "Qwen/Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,  # bfloat16 untuk efisiensi CPU
    device_map="cpu",           # Paksa ke CPU untuk Space gratis
    trust_remote_code=True,
    low_cpu_mem_usage=True      # Optimasi memori
)

# Fungsi untuk menghasilkan respons
def generate_response(user_input, chat_history):
    if not user_input.strip():
        return [{"role": "assistant", "content": "Masukkan teks tidak boleh kosong!"}], chat_history

    if not chat_history:
        chat_history = []

    # Format riwayat percakapan (batasi 5 interaksi terakhir untuk efisiensi)
    messages = []
    for user_msg, bot_msg in chat_history[-5:]:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})
    
    # Tambahkan input pengguna saat ini
    messages.append({"role": "user", "content": user_input})

    # Buat prompt menggunakan format chat Qwen
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # Tokenisasi input
    inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to("cpu")

    # Generate respons
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,  # Batasi token untuk kecepatan
        do_sample=True,
        temperature=0.75,
        top_p=0.85,
        eos_token_id=tokenizer.eos_token_id,
        use_cache=True       # Cache untuk inferensi lebih cepat
    )

    # Decode respons
    bot_response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)

    # Perbarui riwayat percakapan
    chat_history.append((user_input, bot_response))
    
    # Format output untuk Gradio Chatbot (format messages)
    return [
        {"role": "user" if i % 2 == 0 else "assistant", "content": msg}
        for i, (user_msg, bot_msg) in enumerate(chat_history)
        for msg in [user_msg, bot_msg]
    ], chat_history

# Fungsi untuk menghapus riwayat
def clear_history():
    return [], []

# Antarmuka Gradio
with gr.Blocks(
    theme=gr.themes.Monochrome(),  # Tema modern dan bersih
    css="""
        #chatbot {border-radius: 10px; border: 1px solid #e0e0e0; padding: 10px;}
        .gradio-container {max-width: 800px; margin: auto;}
        #input-box {border-radius: 8px;}
        #submit-btn, #clear-btn {border-radius: 8px; background: #007bff; color: white;}
        #submit-btn:hover, #clear-btn:hover {background: #0056b3;}
    """
) as demo:
    gr.Markdown(
        """
        # 💬 Chatbot Qwen (Alibaba)
        Ajukan pertanyaan dan dapatkan respons cerdas dari model Qwen2-0.5B-Instruct!
        """
    )
    
    # Komponen UI
    chatbot = gr.Chatbot(
        type="messages",  # Gunakan format messages untuk kompatibilitas
        height=450,
        show_label=False,
        elem_id="chatbot"
    )
    with gr.Row():
        user_input = gr.Textbox(
            placeholder="Ketik pertanyaanmu di sini...",
            show_label=False,
            elem_id="input-box",
            scale=4
        )
        submit_button = gr.Button("Kirim", elem_id="submit-btn", scale=1)
    
    clear_button = gr.Button("Hapus Riwayat", elem_id="clear-btn")
    
    # State untuk menyimpan riwayat percakapan
    chat_history = gr.State([])

    # Aksi tombol
    submit_button.click(
        fn=generate_response,
        inputs=[user_input, chat_history],
        outputs=[chatbot, chat_history]
    )
    clear_button.click(
        fn=clear_history,
        inputs=None,
        outputs=[chatbot, chat_history]
    )

# Luncurkan aplikasi
demo.launch()