|
import streamlit as st |
|
import pandas as pd |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import numpy as np |
|
|
|
|
|
@st.cache |
|
def load_data(): |
|
return pd.read_csv('qna.csv', encoding='utf-8', delimiter=';') |
|
|
|
def get_most_similar_question(new_sentence, vectorizer, tfidf_matrix, questions, answers): |
|
new_tfidf = vectorizer.transform([new_sentence]) |
|
|
|
similarities = cosine_similarity(new_tfidf, tfidf_matrix) |
|
|
|
most_similar_index = np.argmax(similarities) |
|
|
|
similarity_percentage = similarities[0, most_similar_index] * 100 |
|
|
|
return answers[most_similar_index], similarity_percentage |
|
|
|
def answer_the_question(new_sentence, vectorizer, tfidf_matrix, questions, answers): |
|
most_similar_answer, similarity_percentage = get_most_similar_question(new_sentence, vectorizer, tfidf_matrix, questions, answers) |
|
if similarity_percentage > 70: |
|
return most_similar_answer |
|
else: |
|
return 'Sorry, I am not aware of this information :(' |
|
|
|
def main(): |
|
st.title("Q&A Chatbot") |
|
|
|
|
|
data = load_data() |
|
questions = data['question'].tolist() |
|
answers = data['answer'].tolist() |
|
|
|
|
|
vectorizer = TfidfVectorizer() |
|
tfidf_matrix = vectorizer.fit_transform(questions) |
|
|
|
|
|
user_question = st.text_input("Ask me a question:") |
|
|
|
|
|
if st.button("Submit"): |
|
if user_question: |
|
response = answer_the_question(user_question, vectorizer, tfidf_matrix, questions, answers) |
|
st.write("Answer:") |
|
st.write(response) |
|
else: |
|
st.warning("Please ask a question.") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|