Spaces:
Running
Running
File size: 8,433 Bytes
e2af017 96bff79 e2af017 e727bfc e69523f e727bfc 63c894a e727bfc 535c2d9 63c894a 535c2d9 b375123 f2de8aa 96bff79 e727bfc 3f82507 525bf5b c8e42cd 525bf5b c8e42cd 525bf5b c8e42cd 525bf5b 138e488 378a4bc e69523f a09ca43 378a4bc a09ca43 378a4bc a09ca43 138e488 a09ca43 138e488 a09ca43 138e488 a09ca43 138e488 a09ca43 138e488 a09ca43 138e488 a09ca43 138e488 a09ca43 138e488 a09ca43 138e488 a09ca43 138e488 378a4bc a09ca43 378a4bc c8e42cd b375123 525bf5b b375123 edfa911 b375123 edfa911 b375123 edfa911 b375123 edfa911 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import streamlit as st
import pandas as pd
from transformers import BartForConditionalGeneration, TapexTokenizer, T5ForConditionalGeneration, T5Tokenizer
from prophet import Prophet
# Abrindo e lendo o arquivo CSS
with open("style.css", "r") as css:
css_style = css.read()
# Markdown combinado com a importação da fonte e o HTML
html_content = f"""
<style>
{css_style}
@import url('https://fonts.googleapis.com/css2?family=Kanit:wght@700&display=swap');
</style>
<div style='display: flex; flex-direction: column; align-items: flex-start;'>
<div style='display: flex; align-items: center;'>
<div style='width: 20px; height: 5px; background-color: green; margin-right: 0px;'></div>
<div style='width: 20px; height: 5px; background-color: red; margin-right: 0px;'></div>
<div style='width: 20px; height: 5px; background-color: yellow; margin-right: 18px;'></div>
<span style='font-size: 38px; font-weight: normal; font-family: "Kanit", sans-serif;'>NOSTRADAMUS</span>
</div>
</div>
"""
# Aplicar o markdown combinado no Streamlit
st.markdown(html_content, unsafe_allow_html=True)
# Inicialização de variáveis de estado
if 'all_anomalies' not in st.session_state:
st.session_state['all_anomalies'] = pd.DataFrame()
if 'history' not in st.session_state:
st.session_state['history'] = []
# Carregar os modelos de tradução e TAPEX
pt_en_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-pt-en-t5")
en_pt_translator = T5ForConditionalGeneration.from_pretrained("unicamp-dl/translation-en-pt-t5")
tapex_model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")
tapex_tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
tokenizer = T5Tokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
def translate(text, model, tokenizer, source_lang="pt", target_lang="en"):
input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
outputs = model.generate(input_ids)
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return translated_text
def response(user_question, table_data):
question_en = translate(user_question, pt_en_translator, tokenizer, source_lang="pt", target_lang="en")
encoding = tapex_tokenizer(table=table_data, query=[question_en], padding=True, return_tensors="pt", truncation=True)
outputs = tapex_model.generate(**encoding)
response_en = tapex_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
response_pt = translate(response_en, en_pt_translator, tokenizer, source_lang="en", target_lang="pt")
return response_pt
def load_data(uploaded_file):
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file, quotechar='"', encoding='utf-8')
elif uploaded_file.name.endswith('.xlsx'):
df = pd.read_excel(uploaded_file)
return df
def preprocess_data(df):
new_df = df.iloc[2:,9:-1].fillna(0)
new_df.columns = df.iloc[1,9:-1]
new_df.columns = new_df.columns.str.replace(r" \(\d+\)", "", regex=True)
month_dict = {
'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
}
def convert_column_name(column_name):
# Check if the column name is 'Rótulos de Linha'
if column_name == 'Rótulos de Linha':
return column_name
# Otherwise, proceed to convert
parts = column_name.split('/')
month = parts[0].strip()
year = parts[1].strip()
# Clean year in case there are extra characters
year = ''.join(filter(str.isdigit, year))
# Get month number from the dictionary
month_number = month_dict.get(month, '00') # Default '00' if month is not found
# Return formatted date string
return f"{month_number}/{year}"
new_df.columns = [convert_column_name(col) for col in new_df.columns]
new_df.columns = pd.to_datetime(new_df.columns, errors='coerce')
new_df.rename(columns={new_df.columns[0]: 'Rotulo'}, inplace=True)
df_clean = new_df.copy()
return df_clean
def apply_prophet(df_clean):
if df_clean.empty:
st.error("DataFrame está vazio após o pré-processamento.")
return pd.DataFrame()
# Debugging: Check structure of df_clean
st.write("Estrutura do DataFrame df_clean:")
st.write(df_clean)
# Criar um DataFrame vazio para armazenar todas as anomalias
all_anomalies = pd.DataFrame()
# Processar cada linha no DataFrame
for index, row in df_clean.iterrows():
# Extract timestamp and value columns
date_columns = [col for col in df_clean.columns if isinstance(col, pd.Timestamp)]
data = pd.DataFrame({
'ds': date_columns,
'y': row[date_columns].values
})
# Debugging: Check the data passed into Prophet
st.write(f"Dados para Prophet - Grupo {row['Rotulo']}:")
st.write(data)
# Remove rows where 'y' is zero or missing
data = data[data['y'] > 0].dropna().reset_index(drop=True)
# Ensure there's enough data for Prophet to run
if data.empty or len(data) < 2:
st.write(f"Pular grupo {row['Rotulo']} por não ter observações suficientes.")
continue
try:
# Create and fit the Prophet model
model = Prophet(interval_width=0.95)
model.fit(data)
except ValueError as e:
st.write(f"Pular grupo {row['Rotulo']} devido ao erro: {e}")
continue
# Make future predictions
future = model.make_future_dataframe(periods=12, freq='M')
forecast = model.predict(future)
# Add real values and calculate anomalies
real_values = list(data['y']) + [None] * (len(forecast) - len(data))
forecast['real'] = real_values
anomalies = forecast[(forecast['real'] < forecast['yhat_lower']) | (forecast['real'] > forecast['yhat_upper'])]
# Debugging: Check the anomalies detected
st.write(f"Anomalias detectadas para o grupo {row['Rotulo']}:")
st.write(anomalies)
# Add group label and append anomalies to all_anomalies DataFrame
anomalies['group'] = row['Rotulo']
all_anomalies = pd.concat([all_anomalies, anomalies[['ds', 'real', 'group']]], ignore_index=True)
# Return the dataframe of all anomalies
return all_anomalies
tab1, tab2 = st.tabs(["Meta Prophet", "Microsoft TAPEX"])
# Interface para carregar arquivo
uploaded_file = st.file_uploader("Carregue um arquivo CSV ou XLSX", type=['csv', 'xlsx'])
with tab1:
if uploaded_file:
df = load_data(uploaded_file)
df_clean = preprocess_data(df)
if df_clean.empty:
st.warning("Não há dados válidos para processar.")
else:
# Check if 'all_anomalies' is already in session state to avoid re-running Prophet
if 'all_anomalies' not in st.session_state:
with st.spinner('Aplicando modelo de série temporal...'):
all_anomalies = apply_prophet(df_clean)
st.session_state['all_anomalies'] = all_anomalies
with tab2:
# Ensure 'all_anomalies' exists in session state before allowing user interaction
if 'all_anomalies' in st.session_state and not st.session_state['all_anomalies'].empty:
# Interface para perguntas do usuário
user_question = st.text_input("Escreva sua questão aqui:", "")
if user_question:
bot_response = response(user_question, st.session_state['all_anomalies'])
st.session_state['history'].append(('👤', user_question))
st.session_state['history'].append(('🤖', bot_response))
# Mostrar histórico de conversa
for sender, message in st.session_state['history']:
if sender == '👤':
st.markdown(f"**👤 {message}**")
elif sender == '🤖':
st.markdown(f"**🤖 {message}**", unsafe_allow_html=True)
# Botão para limpar histórico
if st.button("Limpar histórico"):
st.session_state['history'] = []
else:
st.warning("Por favor, processe os dados no Meta Prophet primeiro.")
|