import streamlit as st import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import numpy as np # Load Q&A data def load_data(): return pd.read_csv('qna.csv', encoding='latin1', delimiter=';') def get_most_similar_question(new_sentence, vectorizer, tfidf_matrix, questions, answers): new_tfidf = vectorizer.transform([new_sentence]) similarities = cosine_similarity(new_tfidf, tfidf_matrix) most_similar_index = np.argmax(similarities) similarity_percentage = similarities[0, most_similar_index] * 100 return answers[most_similar_index], similarity_percentage def answer_the_question(new_sentence, vectorizer, tfidf_matrix, questions, answers): most_similar_answer, similarity_percentage = get_most_similar_question(new_sentence, vectorizer, tfidf_matrix, questions, answers) if similarity_percentage > 70: return most_similar_answer else: return 'Sorry, I am not aware of this information :(' def main(): st.markdown( "