import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch import os # Nonaktifkan cache Hugging Face untuk hemat penyimpanan os.environ["HF_HUB_DISABLE_CACHE"] = "1" # Muat model dan tokenizer model_name = "Qwen/Qwen2-0.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.bfloat16, # bfloat16 untuk efisiensi CPU device_map="cpu", # Paksa ke CPU untuk Space gratis trust_remote_code=True, low_cpu_mem_usage=True # Optimasi memori ) # Fungsi untuk menghasilkan respons def generate_response(user_input, chat_history): if not user_input.strip(): return [{"role": "assistant", "content": "Masukkan teks tidak boleh kosong!"}], chat_history if not chat_history: chat_history = [] # Format riwayat percakapan (batasi 5 interaksi terakhir untuk efisiensi) messages = [] for user_msg, bot_msg in chat_history[-5:]: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": bot_msg}) # Tambahkan input pengguna saat ini messages.append({"role": "user", "content": user_input}) # Buat prompt menggunakan format chat Qwen prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # Tokenisasi input inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to("cpu") # Generate respons outputs = model.generate( **inputs, max_new_tokens=200, # Batasi token untuk kecepatan do_sample=True, temperature=0.75, top_p=0.85, eos_token_id=tokenizer.eos_token_id, use_cache=True # Cache untuk inferensi lebih cepat ) # Decode respons bot_response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True) # Perbarui riwayat percakapan chat_history.append((user_input, bot_response)) # Format output untuk Gradio Chatbot (format messages) return [ {"role": "user" if i % 2 == 0 else "assistant", "content": msg} for i, (user_msg, bot_msg) in enumerate(chat_history) for msg in [user_msg, bot_msg] ], chat_history # Fungsi untuk menghapus riwayat def clear_history(): return [], [] # Antarmuka Gradio with gr.Blocks( theme=gr.themes.Monochrome(), # Tema modern dan bersih css=""" #chatbot {border-radius: 10px; border: 1px solid #e0e0e0; padding: 10px;} .gradio-container {max-width: 800px; margin: auto;} #input-box {border-radius: 8px;} #submit-btn, #clear-btn {border-radius: 8px; background: #007bff; color: white;} #submit-btn:hover, #clear-btn:hover {background: #0056b3;} """ ) as demo: gr.Markdown( """ # 💬 Chatbot Qwen (Alibaba) Ajukan pertanyaan dan dapatkan respons cerdas dari model Qwen2-0.5B-Instruct! """ ) # Komponen UI chatbot = gr.Chatbot( type="messages", # Gunakan format messages untuk kompatibilitas height=450, show_label=False, elem_id="chatbot" ) with gr.Row(): user_input = gr.Textbox( placeholder="Ketik pertanyaanmu di sini...", show_label=False, elem_id="input-box", scale=4 ) submit_button = gr.Button("Kirim", elem_id="submit-btn", scale=1) clear_button = gr.Button("Hapus Riwayat", elem_id="clear-btn") # State untuk menyimpan riwayat percakapan chat_history = gr.State([]) # Aksi tombol submit_button.click( fn=generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history] ) clear_button.click( fn=clear_history, inputs=None, outputs=[chatbot, chat_history] ) # Luncurkan aplikasi demo.launch()