File size: 4,478 Bytes
37dcd44 548cea8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
import os
from threading import Thread
# Nonaktifkan cache Hugging Face untuk hemat penyimpanan
os.environ["HF_HUB_DISABLE_CACHE"] = "1"
# Muat model dan tokenizer
model_name = "Qwen/Qwen2-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16, # bfloat16 untuk efisiensi CPU
device_map="cpu", # Paksa ke CPU untuk Space gratis
trust_remote_code=True,
low_cpu_mem_usage=True # Optimasi memori
)
# Fungsi untuk menghasilkan respons
def generate_response(user_input, chat_history):
if not user_input.strip():
return [("Error", "Masukkan teks tidak boleh kosong!")], chat_history
if not chat_history:
chat_history = []
# Format riwayat percakapan (batasi 5 interaksi terakhir untuk efisiensi)
messages = []
for user_msg, bot_msg in chat_history[-5:]:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
# Tambahkan input pengguna saat ini
messages.append({"role": "user", "content": user_input})
# Buat prompt menggunakan format chat Qwen
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Tokenisasi input
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False).to("cpu")
# Gunakan TextStreamer untuk streaming respons (meningkatkan UX)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
# Generate respons di thread terpisah untuk responsivitas
def generate():
outputs = model.generate(
**inputs,
max_new_tokens=200, # Batasi token untuk kecepatan
do_sample=True,
temperature=0.75,
top_p=0.85,
eos_token_id=tokenizer.eos_token_id,
use_cache=True, # Cache untuk inferensi lebih cepat
streamer=streamer
)
return outputs
# Jalankan generasi di thread
thread = Thread(target=generate)
thread.start()
thread.join()
# Ambil respons dari output streamer (decode manual)
bot_response = tokenizer.decode(
model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.75, top_p=0.85)[0][inputs.input_ids.shape[-1]:],
skip_special_tokens=True
)
# Perbarui riwayat percakapan
chat_history.append((user_input, bot_response))
# Format output untuk Gradio Chatbot
return [(user_msg, bot_msg) for user_msg, bot_msg in chat_history], chat_history
# Fungsi untuk menghapus riwayat
def clear_history():
return [], []
# Antarmuka Gradio
with gr.Blocks(
theme=gr.themes.Monochrome(), # Tema modern dan bersih
css="""
#chatbot {border-radius: 10px; border: 1px solid #e0e0e0;}
.gradio-container {max-width: 800px; margin: auto;}
#input-box {border-radius: 8px;}
#submit-btn, #clear-btn {border-radius: 8px; background: #007bff; color: white;}
#submit-btn:hover, #clear-btn:hover {background: #0056b3;}
"""
) as demo:
gr.Markdown(
"""
# 💬 Chatbot Qwen (Alibaba)
Ajukan pertanyaan dan dapatkan respons cerdas dari model Qwen2-0.5B-Instruct!
"""
)
# Komponen UI
chatbot = gr.Chatbot(
label="Percakapan",
height=450,
show_label=False,
elem_id="chatbot",
bubble_full_width=False
)
with gr.Row():
user_input = gr.Textbox(
placeholder="Ketik pertanyaanmu di sini...",
show_label=False,
elem_id="input-box",
scale=4
)
submit_button = gr.Button("Kirim", elem_id="submit-btn", scale=1)
clear_button = gr.Button("Hapus Riwayat", elem_id="clear-btn")
# State untuk menyimpan riwayat percakapan
chat_history = gr.State([])
# Aksi tombol
submit_button.click(
fn=generate_response,
inputs=[user_input, chat_history],
outputs=[chatbot, chat_history],
_js="() => {document.querySelector('input').value = '';}" # Kosongkan input
)
clear_button.click(
fn=clear_history,
inputs=None,
outputs=[chatbot, chat_history]
)
# Luncurkan aplikasi
demo.launch()
|