from modules import tables from google_tools import trends as gtrends import pandas as pd import numpy as np from datetime import timedelta, date from statsmodels.tsa.seasonal import seasonal_decompose import plotly.graph_objects as go from plotly.subplots import make_subplots import streamlit as st import io import boto3 import openpyxl key ='AKIARYMZ4J2YQDB66VX4' secret_key = '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' bucket='portallvam' path ='Momentum.xlsx' def save_s3(key, secret_key, bucket, df, path): with io.BytesIO() as output: with pd.ExcelWriter(output, engine='xlsxwriter') as writer: for industry in df.keys(): df[industry].to_excel(writer, sheet_name=industry) data = output.getvalue() s3 = boto3.resource('s3', aws_access_key_id=key, aws_secret_access_key=secret_key) s3.Bucket(bucket).put_object(Key=path, Body=data) def read_excel_s3(key, secret_key, bucket, path): s3_client = boto3.client('s3', aws_access_key_id=key, aws_secret_access_key=secret_key) response = s3_client.get_object(Bucket=bucket, Key=path) data = response["Body"].read() df = pd.read_excel(io.BytesIO(data), sheet_name=None, index_col='Unnamed: 0.1') return df def generar_excel(ruta_guardado, Pestanas, Data): wb = openpyxl.Workbook() writer = pd.ExcelWriter(ruta_guardado) for pestana in Pestanas: wb.create_sheet(pestana) std = wb.get_sheet_by_name('Sheet') wb.remove_sheet(std) wb.save(ruta_guardado) for i, pestana in enumerate(Pestanas): if pestana=='Real Estate Management & Development-CL': pestana = 'Real Estate-CL' Data['Real Estate Management & Development-CL'].to_excel(writer, sheet_name=pestana) elif pestana=='Real Estate Management & Development-BR': pestana = 'Real Estate-BR' Data['Real Estate Management & Development-BR'].to_excel(writer, sheet_name=pestana) else: Data[pestana].to_excel(writer, sheet_name=Pestanas[i]) writer.save() def colores_corporativos(colors=None): color_dict = {'red': (204, 0, 51), 'light_blue': (110, 162, 201), 'light_gray': (135, 146, 158), 'grey': (105, 105, 105), 'yellow': (195, 195, 9), 'dark_purple': (119, 28, 95), 'blue': (42, 83, 113), 'purple': (159, 37, 127), 'light_yellow': (252, 252, 196), 'light_green': (122, 178, 153), 'gray': (66, 74, 82)} for key in color_dict: color_dict[key] = tuple(v/255 for v in color_dict[key]) if colors is None: return color_dict else: aux = {col: color_dict[col] for col in colors} return aux corp_colors = list(colores_corporativos().values()) colors2 = [] for i in range(len(corp_colors)): colors2.append("rgb" + str(corp_colors[i])) company_db = pd.read_excel('Data/Company_Base_Definitivo.xlsx', sheet_name='Compilado') id_to_ticker = {str(row['ID_Quant']): str(row['Ticker Bloomberg']).split()[0] for i, row in company_db.iterrows()} countries_dict = {'BR': 'Brazil', 'CL': 'Chile', 'US': 'Brazil', 'US-Disease': 'Brazil'} @st.cache(suppress_st_warning=True) def data_request(countries, start, currency='USD'): close_price = {'Brazil': [], 'Chile': []} market_cap = {'Brazil': [], 'Chile': []} for c in countries: close_price[c] = tables.EquityMaster(field='IQ_CLOSEPRICE_ADJ', currency=currency, country=c).query( rename=['asset'], start=start, expand=True) market_cap[c] = tables.EquityMaster(field='IQ_MARKETCAP', currency=currency, country=c).query( start=start, rename=['asset'], expand=True) close_price[c] = close_price[c].loc[:, close_price[c].columns.isin(id_to_ticker.keys())] close_price[c].columns = [id_to_ticker[col] for col in close_price[c].columns] market_cap[c] = market_cap[c].loc[:, market_cap[c].columns.isin(id_to_ticker.keys())] market_cap[c].columns = [id_to_ticker[col] for col in market_cap[c].columns] return [close_price, market_cap] @st.cache(suppress_st_warning=True) def trends_request(keywords, today): trends_frames_dict = {} for sector, values in keywords.items(): if not (sector in ['Restaurantes']): trends_frames_dict[sector] = {} print('Buscando para ' + sector) for country_name in values.columns: words = values[country_name].dropna() if '-' in country_name: fixed_country_name = country_name.split('-')[0].strip() else: fixed_country_name = country_name words_index = pd.DataFrame() for word in words: new_data = gtrends.keyword_trend(word, fixed_country_name, end_date=today) if new_data is not None: words_index = pd.concat([words_index, new_data], axis=1) else: print('No se encuentra data para ' + word) trends_frames_dict[sector][country_name] = words_index trends_frames_dict[sector][country_name].index.name = None return trends_frames_dict def trends_frames_excel(dicc): sheets_cl = [] sheets_br = [] for key_1 in dicc.keys(): for key_2 in dicc[key_1].keys(): if key_2=='CL': sheets_cl .append(key_1 + '-' + key_2) else: sheets_br.append(key_1 + '-' + key_2) trends_frames_dict_cl = {} trends_frames_dict_br = {} for key_1 in dicc.keys(): for key_2 in dicc[key_1].keys(): if key_2=='CL': trends_frames_dict_cl[key_1 + '-'+ key_2] = dicc[key_1][key_2] elif key_2=='BR': trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2] elif key_2=='US' or key_2=='US-Disease': trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2] generar_excel('Data/GT_CL.xlsx', sheets_cl, trends_frames_dict_cl) df_cl = pd.read_excel('Data/GT_CL.xlsx', sheet_name=None) st.write(df_cl) save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_cl, path='GT_CL.xlsx') generar_excel('Data/GT_BR.xlsx', sheets_br, trends_frames_dict_br) df_br = pd.read_excel('Data/GT_BR.xlsx', sheet_name=None) save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_br, path='GT_BR.xlsx') def read_trends_frames(country): if country=='CL': return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_CL.xlsx') elif country=='BR': return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_BR.xlsx') def report(): form = st.form('Report') start_date = str(date.today() - timedelta(5 * 365)) select_countries = form.multiselect('¿Qué país(es) desea visualizar?', ['Todos', 'Chile', 'Brazil']) if 'Todos' in select_countries: select_countries = ['Chile', 'Brazil'] update_data = form.form_submit_button("Actualizar Datos") accept = form.form_submit_button('Visualizar') col1, col2 = st.columns(2) if update_data: xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx') industry_filter = ['Pesca', 'Agricola', 'Financials-RP'] keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names if sheet not in industry_filter} xls.close() del xls # Arreglamos una llave porque una hoja de excel alcanza el máximo de caracteres posible para un nombre. new_key = "Real Estate Management & Development" old_key = "Real Estate Management & Develo" keywords_dict[new_key] = keywords_dict.pop(old_key) trends_dict = trends_request(keywords_dict, date.today()) trends_frames_excel(trends_dict) ud = pd.read_excel('Data/update_data.xlsx') ud = ud[ud['View'] != 'Google Trends'] today = date.today().strftime('%d-%m-%Y') ud = ud.append({"View": 'Google Trends', "Last_Update": today}, ignore_index=True) ud.to_excel('Data/update_data.xlsx', index=False) if accept: close_price_dict, market_cap_dict = data_request(select_countries, start_date) ew_index = {} mw_index = {} country_index = {} if select_countries == ['Brazil']: dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index) .union(set(close_price_dict['Brazil'].index))))} elif select_countries == ['Chile']: dates = {'Chile': sorted(list(set(market_cap_dict['Chile'].index) .union(set(close_price_dict['Chile'].index))))} else: dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index) .union(set(close_price_dict['Brazil'].index)))), 'Chile': sorted(list(set(market_cap_dict['Chile'].index) .union(set(close_price_dict['Chile'].index))))} for country in select_countries: mkt = market_cap_dict[country] cp = close_price_dict[country] w = mkt.div(mkt.sum(1).values, axis=0) rets = cp.pct_change() country_index[country] = pd.DataFrame({'MW': (w * rets).sum(1), 'EW': rets.mean(1)}).fillna(0) industries_1 = np.unique(company_db[['LV1']].values) industries_2 = np.unique(company_db[['LV2']].values) industries = np.unique(np.concatenate([industries_1, industries_2])) df_mw_index = pd.DataFrame(columns=industries, index=dates[country]) df_ew_index = pd.DataFrame(columns=industries, index=dates[country]) for industry in industries: industry = str(industry) mc = mkt.loc[:, mkt.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])] prices = cp.loc[:, cp.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])] w = mc.div(mc.sum(1).values, axis=0) rets = prices.pct_change() df_mw_index[industry] = (w * rets).sum(1) df_ew_index[industry] = rets.mean(1) mw_index[country] = df_mw_index.fillna(0) ew_index[country] = df_ew_index.fillna(0) xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx') industry_filter = ['Pesca', 'Agricola', 'Financials-RP', 'Agriculture'] keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names if sheet not in industry_filter} xls.close() del xls new_key = "Real Estate Management & Development" old_key = "Real Estate Management & Develo" keywords_dict[new_key] = keywords_dict.pop(old_key) trends_frames = {} trends_frames_cl = read_trends_frames('CL') trends_frames_br = read_trends_frames('BR') for key_cl in trends_frames_cl.keys(): trends_frames_cl[key_cl] = trends_frames_cl[key_cl].drop(columns='Unnamed: 0') for key_br in trends_frames_br.keys(): trends_frames_br[key_br] = trends_frames_br[key_br].drop(columns='Unnamed: 0') for industry in keywords_dict.keys(): if not industry=='Restaurantes': countries_in_industry = keywords_dict[industry].columns trends_frames[industry] = {} for c in countries_in_industry: if c=='CL': if industry == 'Real Estate Management & Development': index = trends_frames_cl['Real Estate-CL'].index trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl['Real Estate-CL'].columns, index=index) else: index = trends_frames_cl[industry+'-CL'].index trends_frames_cl[industry+'-CL'] = trends_frames_cl[industry+'-CL'].loc[:, trends_frames_cl[industry+'-CL'].columns.notnull()] trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl[industry+'-CL'].columns, index=index) elif c=='BR': if industry == 'Real Estate Management & Development': index = trends_frames_br['Real Estate-BR'].index trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br['Real Estate-BR'].columns, index=index) else: index = trends_frames_br[industry + '-BR'].index trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br[industry+'-BR'].columns, index=index) if 'CL' in countries_in_industry: if industry == 'Real Estate Management & Development': for col_cl in trends_frames_cl['Real Estate-CL'].columns: if col_cl in keywords_dict[industry]['CL'].values: trends_frames[industry]['CL'][col_cl] = trends_frames_cl['Real Estate-CL'][col_cl].dropna() else: for col_cl in trends_frames_cl[industry+'-CL'].columns: if col_cl in keywords_dict[industry]['CL'].values: trends_frames[industry]['CL'][col_cl] = trends_frames_cl[industry+'-CL'][col_cl].dropna() if 'BR' in countries_in_industry: if industry == 'Real Estate Management & Development': for col_br in trends_frames_br['Real Estate-BR'].columns: if col_br in keywords_dict[industry]['BR'].values: trends_frames[industry]['BR'][col_br] = trends_frames_br['Real Estate-BR'][col_br].dropna() else: for col_br in trends_frames_br[industry+'-BR'].columns: if col_br in keywords_dict[industry]['BR'].values: trends_frames[industry]['BR'][col_br] = trends_frames_br[industry+'-BR'][col_br].dropna() deseason = True n_words = 5 for industry in keywords_dict.keys(): if not industry == 'Restaurantes': countries_in_industry = keywords_dict[industry].columns for c in countries_in_industry: trends_frames[industry][c] = trends_frames[industry][c].loc[:, trends_frames[industry][c].columns.notnull()] summary = pd.DataFrame() fig1 = make_subplots(rows=2, cols=1, subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'], horizontal_spacing=0.6, ) fig2 = make_subplots(rows=2, cols=1, subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'], horizontal_spacing=0.6) for industry, dict_ in trends_frames.items(): for country, df_ in dict_.items(): if deseason: df_ = pd.DataFrame({col: df_[col] - seasonal_decompose(df_[col], period=8).seasonal for col in df_.columns}) summary[f'{industry}-{country}'] = df_.mean(1) summary = (summary - summary.mean()) / summary.std() delta_w = summary.diff(1).iloc[-1].sort_values(ascending=True) delta_m = summary.diff(4).iloc[-1].sort_values(ascending=True) delta_3m = summary.diff(12).iloc[-1].sort_values(ascending=True) delta_ytd = summary.resample('Y').last().diff().iloc[-1].sort_values(ascending=True) fig1.add_trace(go.Bar(x=delta_w.array, y=delta_w.index, orientation='h', marker_color=colors2[2], showlegend=False), row=1, col=1) fig2.add_trace(go.Bar(x=delta_m.array, y=delta_m.index, orientation='h', marker_color=colors2[2], showlegend=False), row=1, col=1) fig1.add_trace(go.Bar(x=delta_3m.array, y=delta_3m.index, orientation='h', marker_color=colors2[2], showlegend=False), row=2, col=1) fig2.add_trace(go.Bar(x=delta_ytd.array, y=delta_ytd.index, orientation='h', marker_color=colors2[2], showlegend=False), row=2, col=1) fig1.update_layout(title_text='Cambios en las Búsquedas', margin_b=0, margin_t=50, margin_r=0, margin_l=0) fig2.update_layout(margin_b=0, margin_t=50, margin_r=0, margin_l=0) col1.plotly_chart(fig1, use_container_width=True) col2.plotly_chart(fig2, use_container_width=True) for industry in trends_frames: for i, (country, data) in enumerate(trends_frames[industry].items()): if countries_dict[country] in select_countries: fig_indices1 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{"secondary_y": False}]], subplot_titles=['GT (zscore) vs Spread Histórico', 'Variación YoY GT Histórico'], horizontal_spacing=0.) fig_indices2 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{"secondary_y": False}]], subplot_titles=['GT (zscore) vs Spread Último Año', 'Variación YoY GT Último año'], horizontal_spacing=0.3) aux_df = summary[f'{industry}-{country}'] aux_df.index.name = '' mm_year = aux_df.rolling(52).mean() mm_half = aux_df.rolling(26).mean() mm_quarter = aux_df.rolling(13).mean() mm_month = aux_df.rolling(4).mean() mm = pd.concat([mm_year, mm_half, mm_quarter, mm_month], axis=1) mm.columns = ['1Y', '6M', '3M', '1M'] if deseason: p = '3M' else: p = '1Y' fig_indices1.add_trace(go.Scatter(x=mm[p].index, y=mm[p].array, line=dict(color=colors2[0]), showlegend=True, name=f'{p} MM GT Index'), secondary_y=False, row=1, col=1) fig_indices1.update_layout(title_text=f'{industry} - {country}') fig_indices2.add_trace( go.Scatter(x=mm[p].iloc[-52:].index, y=mm[p].iloc[-52:].array, line=dict(color=colors2[0]), showlegend=False, name=f'{p} MM GT Index'), secondary_y=False, row=1, col=1) mm_4w = data.mean(1).rolling(4).mean() yoy = mm_4w.pct_change(52) aux2 = pd.concat([yoy], axis=1) aux2.columns = ['YoY'] aux2.index.name = '' fig_indices1.add_trace(go.Bar(x=aux2.dropna().index, y=aux2.dropna()['YoY'], marker_color=colors2[1], showlegend=False), row=2, col=1) fig_indices2.add_trace(go.Bar(x=aux2.dropna().iloc[-52:].index, y=aux2.dropna()['YoY'].iloc[-52:].array, marker_color=colors2[1], showlegend=False), row=2, col=1) if country == 'US' and industry == 'Pesca': country_ = 'Chile' else: country_ = countries_dict[country] spread_mw = (mw_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) - country_index[country_]['MW'].rolling(52).apply(lambda x: (1 + x).prod())) spread_ew = (ew_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) - country_index[country_]['EW'].rolling(52).apply(lambda x: (1 + x).prod())) spread = pd.DataFrame({'EW': spread_ew, 'MW': spread_mw}) fig_indices1.add_trace(go.Scatter(x=spread['MW'].dropna().index, y=spread['MW'].dropna().array, name='Spread MW', line=dict(color=colors2[3])), secondary_y=True, row=1, col=1) fig_indices2.add_trace( go.Scatter(x=spread['MW'].iloc[-260:].dropna().index, y=spread['MW'].iloc[-260:].dropna().array, name='Spread MW', line=dict(color=colors2[3])), secondary_y=True, row=1, col=1) fig_indices1.update_xaxes(showticklabels=False) fig_indices2.update_xaxes(showticklabels=False) fig_indices1.layout.update(xaxis_rangeslider_visible=False, margin_b=20, margin_r=20, margin_l=20, legend=dict(orientation="h", yanchor="top", y=0.6, xanchor="right", x=1)) fig_indices2.layout.update(xaxis_rangeslider_visible=False, margin_b=20, margin_r=20, margin_l=20, legend=dict(orientation="h", yanchor="top", y=0.6, xanchor="right", x=1)) fig_indices1.update_xaxes(showticklabels=True, row=2, col=1) fig_indices2.update_xaxes(showticklabels=True, row=2, col=1) fig_indices1.update_yaxes(tickformat=',.0%', row=2, col=1) fig_indices2.update_yaxes(tickformat=',.0%', row=2, col=1) if deseason: df1 = pd.DataFrame({col: data[col] - seasonal_decompose(data[col]).seasonal for col in data.columns}) else: df1 = data # Top word's table plot last_week = df1.iloc[-1].sort_values(ascending=False)[:n_words] / 100 all_time = df1.mean().sort_values(ascending=False)[:n_words] / 100 fig_W = make_subplots(subplot_titles=['Top Words en ' + f'{industry} - {country}']) table = pd.concat([pd.Series(last_week.index), pd.Series(all_time.index)], axis=1) table.columns = ['Top 1W', 'Top 5Y'] fig_W.add_trace(go.Table(header=dict(values=table.columns), cells=dict(values=[table['Top 1W'].values, table['Top 5Y'].values]))) fig_W.update_layout(margin_b=0, margin_t=50, margin_r=0, margin_l=0, height=200) fig_indices1.update_layout(margin_b=0, margin_t=50, margin_r=0, margin_l=0, height=600) fig_indices2.update_layout(margin_b=0, margin_t=50, margin_r=0, margin_l=0, height=600) col1.plotly_chart(fig_indices1, use_container_width=True) col2.plotly_chart(fig_indices2, use_container_width=True) fig_W.update_layout(margin_b=0, margin_t=30, margin_r=10, margin_l=0) st.plotly_chart(fig_W, use_container_width=True)