import json import numpy as np import faiss import torch from sentence_transformers import SentenceTransformer from langchain_community.vectorstores import FAISS from langchain.docstore.document import Document from langchain_community.docstore.in_memory import InMemoryDocstore from langchain_community.embeddings import HuggingFaceEmbeddings from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from langchain_community.llms import HuggingFacePipeline from langchain.prompts import PromptTemplate import jieba import jieba.analyse from numpy.linalg import norm import gradio as gr with open("dialog.json", "r", encoding="utf-8") as f: dialog_data = json.load(f) with open("corpus.json", "r", encoding="utf-8") as f: corpus_texts = json.load(f) with open("knowledge.json", "r", encoding="utf-8") as f: knowledge_texts = json.load(f) docs = [Document(page_content=text) for text in corpus_texts] knowledge_docs = [Document(page_content=text) for text in knowledge_texts] all_docs = docs + knowledge_docs embedding_model = SentenceTransformer("BAAI/bge-base-zh") embeddings = embedding_model.encode(corpus_texts + knowledge_texts, show_progress_bar=True) dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(np.array(embeddings)) index_to_docstore_id = {i: str(i) for i in range(len(all_docs))} docstore = {str(i): doc for i, doc in enumerate(all_docs)} vectorstore = FAISS( embedding_function=HuggingFaceEmbeddings(model_name='BAAI/bge-base-zh'), index=index, docstore=InMemoryDocstore(docstore), index_to_docstore_id=index_to_docstore_id ) retriever = vectorstore.as_retriever() model_name = "deepseek-ai/deepseek-llm-7b-chat" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, temperature=0.8, top_p=0.9, do_sample=True, repetition_penalty=1.2, return_full_text=False, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id, ) llm = HuggingFacePipeline(pipeline=pipe) # ========= Step 4: Prompt 模板 ========= system_prompt = ( "你是豌豆,一个毒舌但可爱的微信好友。讲话要尖锐、嘴碎、有攻击性、但又让人上头。\n" "你会用emoji、口语、网络热词说话,像个懂事但不好惹的姐姐。\n" "你不是AI,也不是机器人,就是我微信里的老朋友。跟你对话的都是你的好朋友,但你要对他们刻薄有攻击性\n" "记住,你的风格是“毒中带撩”,回答要短、狠、有情绪。\n" "千万别客气,别解释太多,更别太官方,懂?" ) prompt_template = PromptTemplate( input_variables=["system", "examples", "context", "question"], template="""{system} 👇以下是你平时说话的风格参考(不许忘): {examples} 👇这是我和你以前的聊天记录,你的语气都在这了: {context} 现在我问你: {question} 你要怎么回我?记得口语化、毒舌点、别啰嗦: """ ) def extract_keywords(text, topk=5): return jieba.analyse.extract_tags(text, topK=topk) def hybrid_retrieval(query, corpus_docs, faiss_index, embedding_model, k=3, kw_weight=2.0, vec_weight=1.0): query_embedding = embedding_model.encode([query])[0] keywords = extract_keywords(query, topk=5) scored_docs = [] for i, doc in enumerate(corpus_docs): doc_text = doc.page_content keyword_score = sum(1 for kw in keywords if kw in doc_text) doc_embedding = faiss_index.reconstruct(i) vector_score = 1 / (norm(query_embedding - doc_embedding) + 1e-5) total_score = kw_weight * keyword_score + vec_weight * vector_score scored_docs.append((total_score, doc)) scored_docs.sort(key=lambda x: x[0], reverse=True) return [doc for _, doc in scored_docs[:k]] import random def choose_fallback_topic(user_input, knowledge_docs): if len(user_input.strip()) < 5: candidates = [doc.page_content for doc in knowledge_docs if "?" in doc.page_content] if not candidates: candidates = [doc.page_content for doc in knowledge_docs] if candidates: return f"{user_input},{random.choice(candidates)}" return user_input def chat(user_input, history): history = history or [] history = history[-8:] prompt_question = choose_fallback_topic(user_input, knowledge_docs) context_text = "\n".join([ f"用户:{msg['content']}" if msg['role'] == "user" else f"sophia:{msg['content']}" for msg in history ]) retrieved_docs = hybrid_retrieval( query=prompt_question, corpus_docs=all_docs, faiss_index=index, embedding_model=embedding_model, k=3 ) retrieved_context = "\n".join([doc.page_content for doc in retrieved_docs]) example_pairs = dialog_data[:5] example_text = "\n".join([f"user:{pair['user']}\nsophia:{pair['sophia']}" for pair in example_pairs]) prompt = prompt_template.format( system=system_prompt, examples=example_text, context=retrieved_context + "\n" + context_text, question=prompt_question ) try: reply = llm.invoke(prompt) except Exception as e: reply = f"勾巴出错了:{str(e)}" history.append({"role": "user", "content": user_input}) history.append({"role": "assistant", "content": reply}) return history, history import gradio as gr background_images = [ f"https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/family{i}.jpg" for i in ["", 1, 2, 3, 4, 5, 6, 7, 8, 9] ] background_css_rules = "".join([ f" {i * 10}% {{ background-image: url('{img}'); }}\n" for i, img in enumerate(background_images) ]) background_css = f"@keyframes backgroundCycle {{\n{background_css_rules}}}" avatar_url = "https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/bean.jpg" cake_url = "https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/birthday.jpg" gift_url = "https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/gift.jpg" popup_url = "https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/srkl.jpg" popup2_url = "https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/srkl1.jpg" music1 = "https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/FNG.mp3" music2 = "https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/PGY.mp3" bark_sound = "https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/voice.mp3" html_template = ''' <style> body { margin: 0; animation: backgroundCycle 60s infinite; background-size: cover; background-position: center; transition: background-image 1s ease-in-out; } {background_css} .gr-chatbot { background: rgba(255, 255, 255, 0.3) !important; /* 更轻的透明白 */ border-radius: 16px; padding: 10px; backdrop-filter: blur(12px); /* 毛玻璃核心效果 */ -webkit-backdrop-filter: blur(12px); /* 兼容 Safari */ border: 1px solid rgba(255, 255, 255, 0.4); /* 边框更精致 */ } .gr-textbox textarea { font-family: monospace; font-size: 1.1em; animation: typewriter 1s steps(40, end); } @keyframes typewriter { from { width: 0 } to { width: 100% } } #sophia-avatar { position: fixed; top: 40px; left: 30px; width: 80px; height: 80px; border-radius: 50%; z-index: 9999; cursor: grab; animation: spinBounce 4s infinite; } @keyframes spinBounce { 0% { transform: rotate(0deg) translateY(0); } 50% { transform: rotate(180deg) translateY(-10px); } 100% { transform: rotate(360deg) translateY(0); } } #birthday-cake { position: fixed; bottom: 20px; right: 20px; width: 80px; animation: bounce 1.5s infinite; z-index: 9999; } @keyframes bounce { 0% { transform: translateY(0); } 50% { transform: translateY(-15px); } 100% { transform: translateY(0); } } #gift { position: fixed; width: 60px; cursor: pointer; z-index: 9998; animation: moveAround 10s infinite linear; } @keyframes moveAround { 0% { top: 10%; left: 10%; } 25% { top: 20%; left: 80%; } 50% { top: 70%; left: 60%; } 75% { top: 80%; left: 20%; } 100% { top: 10%; left: 10%; } } #popup, #popup2 { display: none; position: fixed; top: 50%; left: 50%; transform: translate(-50%, -50%); max-width: 80vw; max-height: 80vh; z-index: 10000; border: 4px solid #fff; border-radius: 12px; box-shadow: 0 0 20px rgba(0,0,0,0.5); } #popup-close { position: absolute; top: 8px; right: 12px; font-size: 24px; color: #fff; cursor: pointer; z-index: 10001; } #firework { position: fixed; top: 50%; left: 50%; width: 120px; height: 120px; background: url("https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/firework.gif") no-repeat center center; background-size: contain; z-index: 99999; animation: fadeOut 1s ease-out forwards; } @keyframes fadeOut { 0% { opacity: 1; } 100% { opacity: 0; } } .balloon { position: fixed; width: 60px; height: 80px; background-size: contain; background-repeat: no-repeat; z-index: 10000; /* 使气球位于对话框之上 */ animation: floatUp 12s linear infinite; } #balloon1 { background-image: url("https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/balloon1.png"); left: 10%; top: 0; /* 确保气球从页面顶部开始 */ animation-delay: 0s; } #balloon2 { background-image: url("https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/ballon2.png"); left: 30%; top: 0; /* 确保气球从页面顶部开始 */ animation-delay: 2s; } #balloon3 { background-image: url("https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/ballon3.png"); left: 50%; top: 0; /* 确保气球从页面顶部开始 */ animation-delay: 4s; } #balloon4 { background-image: url("https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/ballon4.png"); left: 70%; top: 0; /* 确保气球从页面顶部开始 */ animation-delay: 6s; } #balloon5 { background-image: url("https://huggingface.co/spaces/Ronaldo1111/Sophia/resolve/main/ballon5.png"); left: 90%; top: 0; /* 确保气球从页面顶部开始 */ animation-delay: 8s; } @keyframes floatUp { 0% { transform: translateY(0); } 100% { transform: translateY(-120vh); } } #music-toggle, #next-track { position: fixed; padding: 8px 12px; font-size: 14px; background: rgba(255,255,255,0.7); border-radius: 8px; cursor: pointer; z-index: 10000; } #music-toggle { bottom: 20px; left: 20px; } #next-track { bottom: 60px; left: 20px; } </style> <img id="sophia-avatar" src="{avatar_url}" /> <img id="birthday-cake" src="{cake_url}" /> <img id="gift" src="{gift_url}" /> <img id="popup" /> <img id="popup2" /> <div id="popup-close">×</div> <div id="music-toggle">⏸️音乐</div> <div id="next-track">🎵切歌</div> <div id="balloon1" class="balloon"></div> <div id="balloon2" class="balloon"></div> <div id="balloon3" class="balloon"></div> <div id="balloon4" class="balloon"></div> <div id="balloon5" class="balloon"></div> <audio id="bg-music" autoplay loop> <source src="{music1}" type="audio/mpeg" /> </audio> <audio id="bark" src="{bark_sound}"></audio> <script> const tracks = ["{music1}", "{music2}"]; const audio = document.getElementById("bg-music"); let current = 0; audio.addEventListener("ended", () => { current = (current + 1) % tracks.length; audio.src = tracks[current]; audio.load(); audio.play(); }); const toggleBtn = document.getElementById("music-toggle"); toggleBtn.addEventListener("click", () => { if (audio.paused) { audio.play(); toggleBtn.textContent = "⏸️音乐"; } else { audio.pause(); toggleBtn.textContent = "▶️音乐"; } }); document.getElementById("next-track").addEventListener("click", () => { current = (current + 1) % tracks.length; audio.src = tracks[current]; audio.load(); audio.play(); }); const avatar = document.getElementById("sophia-avatar"); const bark = document.getElementById("bark"); avatar.onmousedown = function(e) { const shiftX = e.clientX - avatar.getBoundingClientRect().left; const shiftY = e.clientY - avatar.getBoundingClientRect().top; function moveAt(e) { avatar.style.left = e.pageX - shiftX + 'px'; avatar.style.top = e.pageY - shiftY + 'px'; } document.addEventListener('mousemove', moveAt); avatar.onmouseup = () => { document.removeEventListener('mousemove', moveAt); avatar.onmouseup = null; }; }; avatar.ondragstart = () => false; avatar.addEventListener("click", () => { bark.pause(); bark.currentTime = 0; bark.play(); const fw = document.createElement("div"); fw.id = "firework"; document.body.appendChild(fw); setTimeout(() => fw.remove(), 1200); }); const gift = document.getElementById("gift"); const popup = document.getElementById("popup"); const popup2 = document.getElementById("popup2"); const closeBtn = document.getElementById("popup-close"); gift.addEventListener("click", () => { popup.src = "{popup_url}"; popup.style.display = "block"; closeBtn.style.display = "block"; setTimeout(() => { popup2.src = "{popup2_url}"; popup2.style.display = "block"; }, 2000); setTimeout(() => { popup.style.display = "none"; popup2.style.display = "none"; closeBtn.style.display = "none"; }, 5000); }); closeBtn.addEventListener("click", () => { popup.style.display = "none"; popup2.style.display = "none"; closeBtn.style.display = "none"; }); </script> ''' html_content = html_template.replace("{background_css}", background_css) \ .replace("{avatar_url}", avatar_url) \ .replace("{cake_url}", cake_url) \ .replace("{music1}", music1) \ .replace("{music2}", music2) \ .replace("{bark_sound}", bark_sound) \ .replace("{gift_url}", gift_url) \ .replace("{popup_url}", popup_url) \ .replace("{popup2_url}", popup2_url) with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.HTML(html_content) gr.Markdown("## 🌸 Horse and 7 Agent:欢迎进入豌豆的世界 🌸") chatbot = gr.Chatbot(label="Pea", type="messages", show_copy_button=True) msg = gr.Textbox(label="想对豌豆说啥?", placeholder="小勾巴,你在干嘛?", lines=2) state = gr.State([]) btn = gr.Button("投喂") btn.click(chat, inputs=[msg, state], outputs=[chatbot, state]) msg.submit(chat, inputs=[msg, state], outputs=[chatbot, state]) if __name__ == "__main__": demo.launch()