Spaces:
Sleeping
Sleeping
File size: 2,893 Bytes
9484ade d5d353f 9484ade d5d353f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
# ์ฌ์ ์ ์๋ Query-Answer๊ฐ ๋ด๊ธด ํ
์ด๋ธ
qna_df = pd.read_csv('./data/qa_data.csv')[['์ง๋ฌธ', '๋ต๋ณ']]
qna_df['์ง๋ฌธ'] = qna_df['์ง๋ฌธ'].apply(lambda x: x.split('์ง๋ฌธ\n')[1]) # "์ง๋ฌธ\n" ์ ๊ฑฐ
qna_df['๋ต๋ณ'] = qna_df['๋ต๋ณ'].apply(lambda x: x.split('๋ต๋ณ\n')[1]) # "๋ต๋ณ\n" ์ ๊ฑฐ
# SentenceTransformer ๋ชจ๋ธ ๋ก๋
embedding_model = SentenceTransformer('jeonseonjin/embedding_BAAI-bge-m3')
# ์ฟผ๋ฆฌ ๋ฌธ์ฅ๋ค์ ๋ํ ์๋ฒ ๋ฉ ๋ฒกํฐ ์์ฑ
query_texts = qna_df['์ง๋ฌธ'].to_list()
query_embeddings = embedding_model.encode(query_texts)
# query-answer ํจ์ ์ ์
def qna_answer_to_query(new_query, embedding_model=embedding_model, query_embeddings=query_embeddings, top_k=1, verbose=True):
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ๊ณ์ฐ
new_query_embedding = embedding_model.encode([new_query])
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ณ์ฐ
cos_sim = cosine_similarity(new_query_embedding, query_embeddings)
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ฐ์ด ๊ฐ์ฅ ํฐ ์ง๋ฌธ์ ์ธ๋ฑ์ค ์ฐพ๊ธฐ
most_similar_idx = np.argmax(cos_sim)
similarity = np.round(cos_sim[0][most_similar_idx], 2)
# ๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ๊ณผ ๋ต๋ณ ๊ฐ์ ธ์ค๊ธฐ
similar_query = query_texts[most_similar_idx]
similar_answer = qna_df.iloc[most_similar_idx]['๋ต๋ณ']
if verbose == True:
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ : ", similar_query)
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ์ ์ฌ๋ : ", similarity)
print("๊ฐ์ฅ ๋น์ทํ ์ง๋ฌธ์ ๋ต: ", similar_answer)
# ๊ฒฐ๊ณผ ๋ฐํ
return similar_query, similarity, similar_answer
import gradio as gr
# ์ง๋ฌธ์ ๋ํ ๋ต๋ณ์ ์ ๊ณตํ๋ ํจ์ (qna_answer_to_query ํจ์ ์ฌ์ฉ)
def chat_with(message, history):
# ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ํด full_answer_to_query๋ฅผ ์ฌ์ฉํ์ฌ ๋ต๋ณ ์์ฑ
response = qna_answer_to_query(message)[2]
# ์ง๋ฌธ๊ณผ ๋ต๋ณ์ ํ์คํ ๋ฆฌ์ ์ ์ฅ (history๋ ๋ํ ํ์คํ ๋ฆฌ)
history.append((message, response))
# Gradio๊ฐ (์๋ต, history)๋ฅผ ๋ฐํํด์ผ ํ๋ฏ๋ก, ๋ํ ๊ธฐ๋ก๊ณผ ํจ๊ป ๋ฐํ
return history, history
# Gradio Chatbot ์ธํฐํ์ด์ค ์์ฑ
with gr.Blocks() as demo:
chatbot = gr.Chatbot() # ๋ํ ๊ธฐ๋ก์ ํ์ํ๋ ์ปดํฌ๋ํธ
msg = gr.Textbox(label="์ง๋ฌธ ์
๋ ฅ") # ์ง๋ฌธ ์
๋ ฅ์ ์ํ ํ
์คํธ ๋ฐ์ค
clear = gr.Button("๋ํ ๊ธฐ๋ก ์ด๊ธฐํ") # ๋ํ ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ
# ๋ํ๊ฐ ์์๋ ๋ ์คํํ ๋์ ์ ์
msg.submit(chat_with, inputs=[msg, chatbot], outputs=[chatbot, msg]) # ์
๋ ฅ๊ฐ์ ์ฒ๋ฆฌ ํ ์ถ๋ ฅ
# ๊ธฐ๋ก ์ด๊ธฐํ ๋ฒํผ ๋์ ์ ์
clear.click(lambda: [], None, chatbot, queue=False) # ๋ํ ๊ธฐ๋ก์ ์ด๊ธฐํ
# ์ฑ ์คํ
demo.launch(share=True)
|