File size: 2,893 Bytes
9484ade
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5d353f
 
9484ade
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5d353f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# ์‚ฌ์ „์ •์˜๋œ Query-Answer๊ฐ€ ๋‹ด๊ธด ํ…Œ์ด๋ธ”
qna_df = pd.read_csv('./data/qa_data.csv')[['์งˆ๋ฌธ', '๋‹ต๋ณ€']]

qna_df['์งˆ๋ฌธ'] = qna_df['์งˆ๋ฌธ'].apply(lambda x: x.split('์งˆ๋ฌธ\n')[1]) # "์งˆ๋ฌธ\n" ์ œ๊ฑฐ
qna_df['๋‹ต๋ณ€'] = qna_df['๋‹ต๋ณ€'].apply(lambda x: x.split('๋‹ต๋ณ€\n')[1]) # "๋‹ต๋ณ€\n" ์ œ๊ฑฐ

# SentenceTransformer ๋ชจ๋ธ ๋กœ๋“œ
embedding_model = SentenceTransformer('jeonseonjin/embedding_BAAI-bge-m3')

# ์ฟผ๋ฆฌ ๋ฌธ์žฅ๋“ค์— ๋Œ€ํ•œ ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ ์ƒ์„ฑ
query_texts = qna_df['์งˆ๋ฌธ'].to_list()
query_embeddings = embedding_model.encode(query_texts)

# query-answer ํ•จ์ˆ˜ ์ •์˜
def qna_answer_to_query(new_query, embedding_model=embedding_model, query_embeddings=query_embeddings, top_k=1, verbose=True):
    # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ๊ณ„์‚ฐ
    new_query_embedding = embedding_model.encode([new_query])

    
    # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
    cos_sim = cosine_similarity(new_query_embedding, query_embeddings)
    
    # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ฐ’์ด ๊ฐ€์žฅ ํฐ ์งˆ๋ฌธ์˜ ์ธ๋ฑ์Šค ์ฐพ๊ธฐ
    most_similar_idx = np.argmax(cos_sim)
    similarity = np.round(cos_sim[0][most_similar_idx], 2)
    
    # ๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ๊ณผ ๋‹ต๋ณ€ ๊ฐ€์ ธ์˜ค๊ธฐ
    similar_query = query_texts[most_similar_idx]
    similar_answer = qna_df.iloc[most_similar_idx]['๋‹ต๋ณ€']
    
    if verbose == True:
        print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ : ", similar_query)
        print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ์˜ ์œ ์‚ฌ๋„ : ", similarity)
        print("๊ฐ€์žฅ ๋น„์Šทํ•œ ์งˆ๋ฌธ์˜ ๋‹ต: ", similar_answer)

    # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
    return similar_query, similarity, similar_answer

import gradio as gr

# ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ œ๊ณตํ•˜๋Š” ํ•จ์ˆ˜ (qna_answer_to_query ํ•จ์ˆ˜ ์‚ฌ์šฉ)
def chat_with(message, history):
    # ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•ด full_answer_to_query๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ ์ƒ์„ฑ
    response = qna_answer_to_query(message)[2]
    
    # ์งˆ๋ฌธ๊ณผ ๋‹ต๋ณ€์„ ํžˆ์Šคํ† ๋ฆฌ์— ์ €์žฅ (history๋Š” ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ)
    history.append((message, response))  
    
    # Gradio๊ฐ€ (์‘๋‹ต, history)๋ฅผ ๋ฐ˜ํ™˜ํ•ด์•ผ ํ•˜๋ฏ€๋กœ, ๋Œ€ํ™” ๊ธฐ๋ก๊ณผ ํ•จ๊ป˜ ๋ฐ˜ํ™˜
    return history, history

# Gradio Chatbot ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()  # ๋Œ€ํ™” ๊ธฐ๋ก์„ ํ‘œ์‹œํ•˜๋Š” ์ปดํฌ๋„ŒํŠธ
    msg = gr.Textbox(label="์งˆ๋ฌธ ์ž…๋ ฅ")  # ์งˆ๋ฌธ ์ž…๋ ฅ์„ ์œ„ํ•œ ํ…์ŠคํŠธ ๋ฐ•์Šค
    clear = gr.Button("๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™”")  # ๋Œ€ํ™” ๊ธฐ๋ก ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ

    # ๋Œ€ํ™”๊ฐ€ ์‹œ์ž‘๋  ๋•Œ ์‹คํ–‰ํ•  ๋™์ž‘ ์ •์˜
    msg.submit(chat_with, inputs=[msg, chatbot], outputs=[chatbot, msg])  # ์ž…๋ ฅ๊ฐ’์„ ์ฒ˜๋ฆฌ ํ›„ ์ถœ๋ ฅ

    # ๊ธฐ๋ก ์ดˆ๊ธฐํ™” ๋ฒ„ํŠผ ๋™์ž‘ ์ •์˜
    clear.click(lambda: [], None, chatbot, queue=False)  # ๋Œ€ํ™” ๊ธฐ๋ก์„ ์ดˆ๊ธฐํ™”

# ์•ฑ ์‹คํ–‰
demo.launch(share=True)