fschwartzer's picture
Update app.py
caea1f5 verified
raw
history blame
4.01 kB
import streamlit as st
import pandas as pd
import torch
from transformers import pipeline
import datetime
from rapidfuzz import process, fuzz
# Load the CSV file
df = pd.read_csv("anomalies.csv", quotechar='"')
# Convert 'real' column to standard float format and then to strings
df['real'] = df['real'].apply(lambda x: f"{x:.2f}")
# Fill NaN values and convert all columns to strings
df = df.fillna('').astype(str)
# Function to filter the DataFrame using RapidFuzz for dates
def filter_dataframe_by_date(df, date_str, threshold=80):
# Apply fuzzy matching on the 'ds' (date) column
matches = process.extract(date_str, df['ds'], scorer=fuzz.token_sort_ratio, limit=None)
filtered_rows = [match[2] for match in matches if match[1] >= threshold]
return df.iloc[filtered_rows]
# Function to filter the DataFrame using RapidFuzz for groups
def filter_dataframe_by_group(df, group_keyword, threshold=80):
# Apply fuzzy matching on the 'Group' column
matches = process.extract(group_keyword, df['Group'], scorer=fuzz.token_sort_ratio, limit=None)
filtered_rows = [match[2] for match in matches if match[1] >= threshold]
return df.iloc[filtered_rows]
# Function to generate a response using the TAPAS model
def response(user_question, df):
a = datetime.datetime.now()
# Extract date and group keywords from the user question
date_str = "December 2022" # Example; you'd extract this from the user question
group_keyword = "IPVA"
# Filter the DataFrame by date and group
subset_df = filter_dataframe_by_date(df, date_str)
subset_df = filter_dataframe_by_group(subset_df, group_keyword)
# Initialize the TAPAS model
tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
tokenizer_kwargs={"clean_up_tokenization_spaces": False})
# Debugging information
print("Filtered DataFrame shape:", subset_df.shape)
print("Filtered DataFrame head:\n", subset_df.head())
print("User question:", user_question)
# Query the TAPAS model
try:
answer = tqa(table=subset_df, query=user_question)['answer']
except IndexError as e:
print(f"Error: {e}")
answer = "Error occurred: " + str(e)
query_result = {
"Resposta": answer
}
b = datetime.datetime.now()
print("Time taken:", b - a)
return query_result
# Streamlit interface
st.markdown("""
<div style='display: flex; align-items: center;'>
<div style='width: 40px; height: 40px; background-color: green; border-radius: 50%; margin-right: 5px;'></div>
<div style='width: 40px; height: 40px; background-color: red; border-radius: 50%; margin-right: 5px;'></div>
<div style='width: 40px; height: 40px; background-color: yellow; border-radius: 50%; margin-right: 5px;'></div>
<span style='font-size: 40px; font-weight: bold;'>Chatbot do Tesouro RS</span>
</div>
""", unsafe_allow_html=True)
# Chat history
if 'history' not in st.session_state:
st.session_state['history'] = []
# Input box for user question
user_question = st.text_input("Escreva sua questΓ£o aqui:", "")
if user_question:
# Add human emoji when user asks a question
st.session_state['history'].append(('πŸ‘€', user_question))
st.markdown(f"**πŸ‘€ {user_question}**")
# Generate the response
bot_response = response(user_question, df)["Resposta"]
# Add robot emoji when generating response and align to the right
st.session_state['history'].append(('πŸ€–', bot_response))
st.markdown(f"<div style='text-align: right'>**πŸ€– {bot_response}**</div>", unsafe_allow_html=True)
# Clear history button
if st.button("Limpar"):
st.session_state['history'] = []
# Display chat history
for sender, message in st.session_state['history']:
if sender == 'πŸ‘€':
st.markdown(f"**πŸ‘€ {message}**")
elif sender == 'πŸ€–':
st.markdown(f"<div style='text-align: right'>**πŸ€– {message}**</div>", unsafe_allow_html=True)