Spaces:
Configuration error
Configuration error
from modules import tables | |
from google_tools import trends as gtrends | |
import pandas as pd | |
import numpy as np | |
from datetime import timedelta, date | |
from statsmodels.tsa.seasonal import seasonal_decompose | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
import streamlit as st | |
import io | |
import boto3 | |
import openpyxl | |
key ='AKIARYMZ4J2YQDB66VX4' | |
secret_key = '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' | |
bucket='portallvam' | |
path ='Momentum.xlsx' | |
def save_s3(key, secret_key, bucket, df, path): | |
with io.BytesIO() as output: | |
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: | |
for industry in df.keys(): | |
df[industry].to_excel(writer, sheet_name=industry) | |
data = output.getvalue() | |
s3 = boto3.resource('s3', aws_access_key_id=key, aws_secret_access_key=secret_key) | |
s3.Bucket(bucket).put_object(Key=path, Body=data) | |
def read_excel_s3(key, secret_key, bucket, path): | |
s3_client = boto3.client('s3', aws_access_key_id=key, aws_secret_access_key=secret_key) | |
response = s3_client.get_object(Bucket=bucket, Key=path) | |
data = response["Body"].read() | |
df = pd.read_excel(io.BytesIO(data), sheet_name=None, index_col='Unnamed: 0.1') | |
return df | |
def generar_excel(ruta_guardado, Pestanas, Data): | |
wb = openpyxl.Workbook() | |
writer = pd.ExcelWriter(ruta_guardado) | |
for pestana in Pestanas: | |
wb.create_sheet(pestana) | |
std = wb.get_sheet_by_name('Sheet') | |
wb.remove_sheet(std) | |
wb.save(ruta_guardado) | |
for i, pestana in enumerate(Pestanas): | |
if pestana=='Real Estate Management & Development-CL': | |
pestana = 'Real Estate-CL' | |
Data['Real Estate Management & Development-CL'].to_excel(writer, sheet_name=pestana) | |
elif pestana=='Real Estate Management & Development-BR': | |
pestana = 'Real Estate-BR' | |
Data['Real Estate Management & Development-BR'].to_excel(writer, sheet_name=pestana) | |
else: | |
Data[pestana].to_excel(writer, sheet_name=Pestanas[i]) | |
writer.save() | |
def colores_corporativos(colors=None): | |
color_dict = {'red': (204, 0, 51), | |
'light_blue': (110, 162, 201), | |
'light_gray': (135, 146, 158), | |
'grey': (105, 105, 105), | |
'yellow': (195, 195, 9), | |
'dark_purple': (119, 28, 95), | |
'blue': (42, 83, 113), | |
'purple': (159, 37, 127), | |
'light_yellow': (252, 252, 196), | |
'light_green': (122, 178, 153), | |
'gray': (66, 74, 82)} | |
for key in color_dict: | |
color_dict[key] = tuple(v/255 for v in color_dict[key]) | |
if colors is None: | |
return color_dict | |
else: | |
aux = {col: color_dict[col] for col in colors} | |
return aux | |
corp_colors = list(colores_corporativos().values()) | |
colors2 = [] | |
for i in range(len(corp_colors)): | |
colors2.append("rgb" + str(corp_colors[i])) | |
company_db = pd.read_excel('Data/Company_Base_Definitivo.xlsx', sheet_name='Compilado') | |
id_to_ticker = {str(row['ID_Quant']): str(row['Ticker Bloomberg']).split()[0] for i, row in company_db.iterrows()} | |
countries_dict = {'BR': 'Brazil', 'CL': 'Chile', 'US': 'Brazil', | |
'US-Disease': 'Brazil'} | |
def data_request(countries, start, currency='USD'): | |
close_price = {'Brazil': [], | |
'Chile': []} | |
market_cap = {'Brazil': [], | |
'Chile': []} | |
for c in countries: | |
close_price[c] = tables.EquityMaster(field='IQ_CLOSEPRICE_ADJ', currency=currency, country=c).query( | |
rename=['asset'], start=start, expand=True) | |
market_cap[c] = tables.EquityMaster(field='IQ_MARKETCAP', currency=currency, country=c).query( | |
start=start, rename=['asset'], expand=True) | |
close_price[c] = close_price[c].loc[:, close_price[c].columns.isin(id_to_ticker.keys())] | |
close_price[c].columns = [id_to_ticker[col] for col in close_price[c].columns] | |
market_cap[c] = market_cap[c].loc[:, market_cap[c].columns.isin(id_to_ticker.keys())] | |
market_cap[c].columns = [id_to_ticker[col] for col in market_cap[c].columns] | |
return [close_price, market_cap] | |
def trends_request(keywords, today): | |
trends_frames_dict = {} | |
for sector, values in keywords.items(): | |
if not (sector in ['Restaurantes']): | |
trends_frames_dict[sector] = {} | |
print('Buscando para ' + sector) | |
for country_name in values.columns: | |
words = values[country_name].dropna() | |
if '-' in country_name: | |
fixed_country_name = country_name.split('-')[0].strip() | |
else: | |
fixed_country_name = country_name | |
words_index = pd.DataFrame() | |
for word in words: | |
new_data = gtrends.keyword_trend(word, fixed_country_name, end_date=today) | |
if new_data is not None: | |
words_index = pd.concat([words_index, | |
new_data], | |
axis=1) | |
else: | |
print('No se encuentra data para ' + word) | |
trends_frames_dict[sector][country_name] = words_index | |
trends_frames_dict[sector][country_name].index.name = None | |
return trends_frames_dict | |
def trends_frames_excel(dicc): | |
sheets_cl = [] | |
sheets_br = [] | |
for key_1 in dicc.keys(): | |
for key_2 in dicc[key_1].keys(): | |
if key_2=='CL': | |
sheets_cl .append(key_1 + '-' + key_2) | |
else: | |
sheets_br.append(key_1 + '-' + key_2) | |
trends_frames_dict_cl = {} | |
trends_frames_dict_br = {} | |
for key_1 in dicc.keys(): | |
for key_2 in dicc[key_1].keys(): | |
if key_2=='CL': | |
trends_frames_dict_cl[key_1 + '-'+ key_2] = dicc[key_1][key_2] | |
elif key_2=='BR': | |
trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2] | |
elif key_2=='US' or key_2=='US-Disease': | |
trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2] | |
generar_excel('Data/GT_CL.xlsx', sheets_cl, trends_frames_dict_cl) | |
df_cl = pd.read_excel('Data/GT_CL.xlsx', sheet_name=None) | |
st.write(df_cl) | |
save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_cl, path='GT_CL.xlsx') | |
generar_excel('Data/GT_BR.xlsx', sheets_br, trends_frames_dict_br) | |
df_br = pd.read_excel('Data/GT_BR.xlsx', sheet_name=None) | |
save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_br, path='GT_BR.xlsx') | |
def read_trends_frames(country): | |
if country=='CL': | |
return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_CL.xlsx') | |
elif country=='BR': | |
return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_BR.xlsx') | |
def report(): | |
form = st.form('Report') | |
start_date = str(date.today() - timedelta(5 * 365)) | |
select_countries = form.multiselect('驴Qu茅 pa铆s(es) desea visualizar?', ['Todos', 'Chile', 'Brazil']) | |
if 'Todos' in select_countries: | |
select_countries = ['Chile', 'Brazil'] | |
update_data = form.form_submit_button("Actualizar Datos") | |
accept = form.form_submit_button('Visualizar') | |
col1, col2 = st.columns(2) | |
if update_data: | |
xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx') | |
industry_filter = ['Pesca', 'Agricola', 'Financials-RP'] | |
keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names | |
if sheet not in industry_filter} | |
xls.close() | |
del xls | |
# Arreglamos una llave porque una hoja de excel alcanza el m谩ximo de caracteres posible para un nombre. | |
new_key = "Real Estate Management & Development" | |
old_key = "Real Estate Management & Develo" | |
keywords_dict[new_key] = keywords_dict.pop(old_key) | |
trends_dict = trends_request(keywords_dict, date.today()) | |
trends_frames_excel(trends_dict) | |
ud = pd.read_excel('Data/update_data.xlsx') | |
ud = ud[ud['View'] != 'Google Trends'] | |
today = date.today().strftime('%d-%m-%Y') | |
ud = ud.append({"View": 'Google Trends', | |
"Last_Update": today}, ignore_index=True) | |
ud.to_excel('Data/update_data.xlsx', index=False) | |
if accept: | |
close_price_dict, market_cap_dict = data_request(select_countries, start_date) | |
ew_index = {} | |
mw_index = {} | |
country_index = {} | |
if select_countries == ['Brazil']: | |
dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index) | |
.union(set(close_price_dict['Brazil'].index))))} | |
elif select_countries == ['Chile']: | |
dates = {'Chile': sorted(list(set(market_cap_dict['Chile'].index) | |
.union(set(close_price_dict['Chile'].index))))} | |
else: | |
dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index) | |
.union(set(close_price_dict['Brazil'].index)))), | |
'Chile': sorted(list(set(market_cap_dict['Chile'].index) | |
.union(set(close_price_dict['Chile'].index))))} | |
for country in select_countries: | |
mkt = market_cap_dict[country] | |
cp = close_price_dict[country] | |
w = mkt.div(mkt.sum(1).values, axis=0) | |
rets = cp.pct_change() | |
country_index[country] = pd.DataFrame({'MW': (w * rets).sum(1), | |
'EW': rets.mean(1)}).fillna(0) | |
industries_1 = np.unique(company_db[['LV1']].values) | |
industries_2 = np.unique(company_db[['LV2']].values) | |
industries = np.unique(np.concatenate([industries_1, industries_2])) | |
df_mw_index = pd.DataFrame(columns=industries, index=dates[country]) | |
df_ew_index = pd.DataFrame(columns=industries, index=dates[country]) | |
for industry in industries: | |
industry = str(industry) | |
mc = mkt.loc[:, mkt.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])] | |
prices = cp.loc[:, cp.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])] | |
w = mc.div(mc.sum(1).values, axis=0) | |
rets = prices.pct_change() | |
df_mw_index[industry] = (w * rets).sum(1) | |
df_ew_index[industry] = rets.mean(1) | |
mw_index[country] = df_mw_index.fillna(0) | |
ew_index[country] = df_ew_index.fillna(0) | |
xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx') | |
industry_filter = ['Pesca', 'Agricola', 'Financials-RP', 'Agriculture'] | |
keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names | |
if sheet not in industry_filter} | |
xls.close() | |
del xls | |
new_key = "Real Estate Management & Development" | |
old_key = "Real Estate Management & Develo" | |
keywords_dict[new_key] = keywords_dict.pop(old_key) | |
trends_frames = {} | |
trends_frames_cl = read_trends_frames('CL') | |
trends_frames_br = read_trends_frames('BR') | |
for key_cl in trends_frames_cl.keys(): | |
trends_frames_cl[key_cl] = trends_frames_cl[key_cl].drop(columns='Unnamed: 0') | |
for key_br in trends_frames_br.keys(): | |
trends_frames_br[key_br] = trends_frames_br[key_br].drop(columns='Unnamed: 0') | |
for industry in keywords_dict.keys(): | |
if not industry=='Restaurantes': | |
countries_in_industry = keywords_dict[industry].columns | |
trends_frames[industry] = {} | |
for c in countries_in_industry: | |
if c=='CL': | |
if industry == 'Real Estate Management & Development': | |
index = trends_frames_cl['Real Estate-CL'].index | |
trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl['Real Estate-CL'].columns, | |
index=index) | |
else: | |
index = trends_frames_cl[industry+'-CL'].index | |
trends_frames_cl[industry+'-CL'] = trends_frames_cl[industry+'-CL'].loc[:, trends_frames_cl[industry+'-CL'].columns.notnull()] | |
trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl[industry+'-CL'].columns, | |
index=index) | |
elif c=='BR': | |
if industry == 'Real Estate Management & Development': | |
index = trends_frames_br['Real Estate-BR'].index | |
trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br['Real Estate-BR'].columns, | |
index=index) | |
else: | |
index = trends_frames_br[industry + '-BR'].index | |
trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br[industry+'-BR'].columns, | |
index=index) | |
if 'CL' in countries_in_industry: | |
if industry == 'Real Estate Management & Development': | |
for col_cl in trends_frames_cl['Real Estate-CL'].columns: | |
if col_cl in keywords_dict[industry]['CL'].values: | |
trends_frames[industry]['CL'][col_cl] = trends_frames_cl['Real Estate-CL'][col_cl].dropna() | |
else: | |
for col_cl in trends_frames_cl[industry+'-CL'].columns: | |
if col_cl in keywords_dict[industry]['CL'].values: | |
trends_frames[industry]['CL'][col_cl] = trends_frames_cl[industry+'-CL'][col_cl].dropna() | |
if 'BR' in countries_in_industry: | |
if industry == 'Real Estate Management & Development': | |
for col_br in trends_frames_br['Real Estate-BR'].columns: | |
if col_br in keywords_dict[industry]['BR'].values: | |
trends_frames[industry]['BR'][col_br] = trends_frames_br['Real Estate-BR'][col_br].dropna() | |
else: | |
for col_br in trends_frames_br[industry+'-BR'].columns: | |
if col_br in keywords_dict[industry]['BR'].values: | |
trends_frames[industry]['BR'][col_br] = trends_frames_br[industry+'-BR'][col_br].dropna() | |
deseason = True | |
n_words = 5 | |
for industry in keywords_dict.keys(): | |
if not industry == 'Restaurantes': | |
countries_in_industry = keywords_dict[industry].columns | |
for c in countries_in_industry: | |
trends_frames[industry][c] = trends_frames[industry][c].loc[:, trends_frames[industry][c].columns.notnull()] | |
summary = pd.DataFrame() | |
fig1 = make_subplots(rows=2, cols=1, | |
subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'], | |
horizontal_spacing=0.6, ) | |
fig2 = make_subplots(rows=2, cols=1, | |
subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'], | |
horizontal_spacing=0.6) | |
for industry, dict_ in trends_frames.items(): | |
for country, df_ in dict_.items(): | |
if deseason: | |
df_ = pd.DataFrame({col: df_[col] - | |
seasonal_decompose(df_[col], period=8).seasonal | |
for col in df_.columns}) | |
summary[f'{industry}-{country}'] = df_.mean(1) | |
summary = (summary - summary.mean()) / summary.std() | |
delta_w = summary.diff(1).iloc[-1].sort_values(ascending=True) | |
delta_m = summary.diff(4).iloc[-1].sort_values(ascending=True) | |
delta_3m = summary.diff(12).iloc[-1].sort_values(ascending=True) | |
delta_ytd = summary.resample('Y').last().diff().iloc[-1].sort_values(ascending=True) | |
fig1.add_trace(go.Bar(x=delta_w.array, y=delta_w.index, orientation='h', marker_color=colors2[2], | |
showlegend=False), row=1, col=1) | |
fig2.add_trace(go.Bar(x=delta_m.array, y=delta_m.index, orientation='h', marker_color=colors2[2], | |
showlegend=False), row=1, col=1) | |
fig1.add_trace(go.Bar(x=delta_3m.array, y=delta_3m.index, orientation='h', marker_color=colors2[2], | |
showlegend=False), row=2, col=1) | |
fig2.add_trace(go.Bar(x=delta_ytd.array, y=delta_ytd.index, orientation='h', marker_color=colors2[2], | |
showlegend=False), row=2, col=1) | |
fig1.update_layout(title_text='Cambios en las B煤squedas', margin_b=0, margin_t=50, margin_r=0, margin_l=0) | |
fig2.update_layout(margin_b=0, margin_t=50, margin_r=0, margin_l=0) | |
col1.plotly_chart(fig1, use_container_width=True) | |
col2.plotly_chart(fig2, use_container_width=True) | |
for industry in trends_frames: | |
for i, (country, data) in enumerate(trends_frames[industry].items()): | |
if countries_dict[country] in select_countries: | |
fig_indices1 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], | |
[{"secondary_y": False}]], | |
subplot_titles=['GT (zscore) vs Spread Hist贸rico', | |
'Variaci贸n YoY GT Hist贸rico'], | |
horizontal_spacing=0.) | |
fig_indices2 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], | |
[{"secondary_y": False}]], | |
subplot_titles=['GT (zscore) vs Spread 脷ltimo A帽o', | |
'Variaci贸n YoY GT 脷ltimo a帽o'], | |
horizontal_spacing=0.3) | |
aux_df = summary[f'{industry}-{country}'] | |
aux_df.index.name = '' | |
mm_year = aux_df.rolling(52).mean() | |
mm_half = aux_df.rolling(26).mean() | |
mm_quarter = aux_df.rolling(13).mean() | |
mm_month = aux_df.rolling(4).mean() | |
mm = pd.concat([mm_year, mm_half, mm_quarter, mm_month], axis=1) | |
mm.columns = ['1Y', '6M', '3M', '1M'] | |
if deseason: | |
p = '3M' | |
else: | |
p = '1Y' | |
fig_indices1.add_trace(go.Scatter(x=mm[p].index, y=mm[p].array, line=dict(color=colors2[0]), | |
showlegend=True, name=f'{p} MM GT Index'), | |
secondary_y=False, row=1, col=1) | |
fig_indices1.update_layout(title_text=f'{industry} - {country}') | |
fig_indices2.add_trace( | |
go.Scatter(x=mm[p].iloc[-52:].index, y=mm[p].iloc[-52:].array, line=dict(color=colors2[0]), | |
showlegend=False, name=f'{p} MM GT Index'), | |
secondary_y=False, row=1, col=1) | |
mm_4w = data.mean(1).rolling(4).mean() | |
yoy = mm_4w.pct_change(52) | |
aux2 = pd.concat([yoy], axis=1) | |
aux2.columns = ['YoY'] | |
aux2.index.name = '' | |
fig_indices1.add_trace(go.Bar(x=aux2.dropna().index, y=aux2.dropna()['YoY'], | |
marker_color=colors2[1], showlegend=False), row=2, col=1) | |
fig_indices2.add_trace(go.Bar(x=aux2.dropna().iloc[-52:].index, y=aux2.dropna()['YoY'].iloc[-52:].array, | |
marker_color=colors2[1], showlegend=False), row=2, col=1) | |
if country == 'US' and industry == 'Pesca': | |
country_ = 'Chile' | |
else: | |
country_ = countries_dict[country] | |
spread_mw = (mw_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) - | |
country_index[country_]['MW'].rolling(52).apply(lambda x: (1 + x).prod())) | |
spread_ew = (ew_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) - | |
country_index[country_]['EW'].rolling(52).apply(lambda x: (1 + x).prod())) | |
spread = pd.DataFrame({'EW': spread_ew, 'MW': spread_mw}) | |
fig_indices1.add_trace(go.Scatter(x=spread['MW'].dropna().index, y=spread['MW'].dropna().array, | |
name='Spread MW', line=dict(color=colors2[3])), | |
secondary_y=True, row=1, col=1) | |
fig_indices2.add_trace( | |
go.Scatter(x=spread['MW'].iloc[-260:].dropna().index, y=spread['MW'].iloc[-260:].dropna().array, | |
name='Spread MW', line=dict(color=colors2[3])), | |
secondary_y=True, row=1, col=1) | |
fig_indices1.update_xaxes(showticklabels=False) | |
fig_indices2.update_xaxes(showticklabels=False) | |
fig_indices1.layout.update(xaxis_rangeslider_visible=False, margin_b=20, | |
margin_r=20, margin_l=20, | |
legend=dict(orientation="h", | |
yanchor="top", | |
y=0.6, | |
xanchor="right", | |
x=1)) | |
fig_indices2.layout.update(xaxis_rangeslider_visible=False, | |
margin_b=20, | |
margin_r=20, margin_l=20, | |
legend=dict(orientation="h", | |
yanchor="top", | |
y=0.6, | |
xanchor="right", | |
x=1)) | |
fig_indices1.update_xaxes(showticklabels=True, row=2, | |
col=1) | |
fig_indices2.update_xaxes(showticklabels=True, row=2, | |
col=1) | |
fig_indices1.update_yaxes(tickformat=',.0%', row=2, col=1) | |
fig_indices2.update_yaxes(tickformat=',.0%', row=2, col=1) | |
if deseason: | |
df1 = pd.DataFrame({col: data[col] - | |
seasonal_decompose(data[col]).seasonal | |
for col in data.columns}) | |
else: | |
df1 = data | |
# Top word's table plot | |
last_week = df1.iloc[-1].sort_values(ascending=False)[:n_words] / 100 | |
all_time = df1.mean().sort_values(ascending=False)[:n_words] / 100 | |
fig_W = make_subplots(subplot_titles=['Top Words en ' + f'{industry} - {country}']) | |
table = pd.concat([pd.Series(last_week.index), | |
pd.Series(all_time.index)], axis=1) | |
table.columns = ['Top 1W', 'Top 5Y'] | |
fig_W.add_trace(go.Table(header=dict(values=table.columns), | |
cells=dict(values=[table['Top 1W'].values, table['Top 5Y'].values]))) | |
fig_W.update_layout(margin_b=0, margin_t=50, | |
margin_r=0, margin_l=0, | |
height=200) | |
fig_indices1.update_layout(margin_b=0, margin_t=50, | |
margin_r=0, margin_l=0, | |
height=600) | |
fig_indices2.update_layout(margin_b=0, margin_t=50, | |
margin_r=0, margin_l=0, | |
height=600) | |
col1.plotly_chart(fig_indices1, use_container_width=True) | |
col2.plotly_chart(fig_indices2, use_container_width=True) | |
fig_W.update_layout(margin_b=0, margin_t=30, margin_r=10, margin_l=0) | |
st.plotly_chart(fig_W, use_container_width=True) | |