Spaces:

bullm
/

PortalLVAM

Configuration error

File size: 25,533 Bytes

025632f

from modules import tables
from google_tools import trends as gtrends
import pandas as pd
import numpy as np
from datetime import timedelta, date
from statsmodels.tsa.seasonal import seasonal_decompose
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import streamlit as st
import io
import boto3
import openpyxl


key ='AKIARYMZ4J2YQDB66VX4'
secret_key = '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'
bucket='portallvam'
path ='Momentum.xlsx'


def save_s3(key, secret_key, bucket, df, path):
    with io.BytesIO() as output:
        with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
            for industry in df.keys():
                df[industry].to_excel(writer, sheet_name=industry)
        data = output.getvalue()
    s3 = boto3.resource('s3', aws_access_key_id=key, aws_secret_access_key=secret_key)
    s3.Bucket(bucket).put_object(Key=path, Body=data)


def read_excel_s3(key, secret_key, bucket, path):
    s3_client = boto3.client('s3', aws_access_key_id=key, aws_secret_access_key=secret_key)
    response = s3_client.get_object(Bucket=bucket, Key=path)
    data = response["Body"].read()
    df = pd.read_excel(io.BytesIO(data), sheet_name=None, index_col='Unnamed: 0.1')
    return df


def generar_excel(ruta_guardado, Pestanas, Data):
    wb = openpyxl.Workbook()
    writer = pd.ExcelWriter(ruta_guardado)
    for pestana in Pestanas:
        wb.create_sheet(pestana)
    std = wb.get_sheet_by_name('Sheet')
    wb.remove_sheet(std)
    wb.save(ruta_guardado)
    for i, pestana in enumerate(Pestanas):
        if pestana=='Real Estate Management & Development-CL':
            pestana = 'Real Estate-CL'
            Data['Real Estate Management & Development-CL'].to_excel(writer, sheet_name=pestana)
        elif pestana=='Real Estate Management & Development-BR':
            pestana = 'Real Estate-BR'
            Data['Real Estate Management & Development-BR'].to_excel(writer, sheet_name=pestana)
        else:
            Data[pestana].to_excel(writer, sheet_name=Pestanas[i])
    writer.save()


def colores_corporativos(colors=None):

    color_dict = {'red': (204, 0, 51),
                   'light_blue': (110, 162, 201),
                   'light_gray': (135, 146, 158),
                   'grey': (105, 105, 105),
                   'yellow': (195, 195, 9),
                   'dark_purple': (119, 28, 95),
                   'blue': (42, 83, 113),
                   'purple': (159, 37, 127),
                   'light_yellow': (252, 252, 196),
                   'light_green': (122, 178, 153),
                   'gray': (66, 74, 82)}

    for key in color_dict:
        color_dict[key] = tuple(v/255 for v in color_dict[key])

    if colors is None:
        return color_dict
    else:
        aux = {col: color_dict[col] for col in colors}
        return aux


corp_colors = list(colores_corporativos().values())
colors2 = []
for i in range(len(corp_colors)):
    colors2.append("rgb" + str(corp_colors[i]))


company_db = pd.read_excel('Data/Company_Base_Definitivo.xlsx', sheet_name='Compilado')
id_to_ticker = {str(row['ID_Quant']): str(row['Ticker Bloomberg']).split()[0] for i, row in company_db.iterrows()}

countries_dict = {'BR': 'Brazil', 'CL': 'Chile', 'US': 'Brazil',
                  'US-Disease': 'Brazil'}


@st.cache(suppress_st_warning=True)
def data_request(countries, start, currency='USD'):
    close_price = {'Brazil': [],
                   'Chile': []}
    market_cap = {'Brazil': [],
                  'Chile': []}
    for c in countries:
        close_price[c] = tables.EquityMaster(field='IQ_CLOSEPRICE_ADJ', currency=currency, country=c).query(
            rename=['asset'], start=start, expand=True)
        market_cap[c] = tables.EquityMaster(field='IQ_MARKETCAP', currency=currency, country=c).query(
            start=start, rename=['asset'], expand=True)

        close_price[c] = close_price[c].loc[:, close_price[c].columns.isin(id_to_ticker.keys())]
        close_price[c].columns = [id_to_ticker[col] for col in close_price[c].columns]

        market_cap[c] = market_cap[c].loc[:, market_cap[c].columns.isin(id_to_ticker.keys())]
        market_cap[c].columns = [id_to_ticker[col] for col in market_cap[c].columns]
    return [close_price, market_cap]


@st.cache(suppress_st_warning=True)
def trends_request(keywords, today):
    trends_frames_dict = {}
    for sector, values in keywords.items():
        if not (sector in ['Restaurantes']):
            trends_frames_dict[sector] = {}
            print('Buscando para ' + sector)
            for country_name in values.columns:
                words = values[country_name].dropna()
                if '-' in country_name:
                    fixed_country_name = country_name.split('-')[0].strip()
                else:
                    fixed_country_name = country_name
                words_index = pd.DataFrame()
                for word in words:
                    new_data = gtrends.keyword_trend(word, fixed_country_name, end_date=today)
                    if new_data is not None:
                        words_index = pd.concat([words_index,
                                                 new_data],
                                                axis=1)
                    else:
                        print('No se encuentra data para ' + word)
                trends_frames_dict[sector][country_name] = words_index
                trends_frames_dict[sector][country_name].index.name = None
    return trends_frames_dict


def trends_frames_excel(dicc):
    sheets_cl = []
    sheets_br = []
    for key_1 in dicc.keys():
        for key_2 in dicc[key_1].keys():
            if key_2=='CL':
                sheets_cl .append(key_1 + '-' + key_2)
            else:
                sheets_br.append(key_1 + '-' + key_2)
    trends_frames_dict_cl = {}
    trends_frames_dict_br = {}
    for key_1 in dicc.keys():
        for key_2 in dicc[key_1].keys():
            if key_2=='CL':
                trends_frames_dict_cl[key_1 + '-'+ key_2] = dicc[key_1][key_2]
            elif key_2=='BR':
                trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2]
            elif key_2=='US' or key_2=='US-Disease':
                trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2]

    generar_excel('Data/GT_CL.xlsx', sheets_cl, trends_frames_dict_cl)
    df_cl = pd.read_excel('Data/GT_CL.xlsx', sheet_name=None)
    st.write(df_cl)
    save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_cl, path='GT_CL.xlsx')

    generar_excel('Data/GT_BR.xlsx', sheets_br, trends_frames_dict_br)
    df_br = pd.read_excel('Data/GT_BR.xlsx', sheet_name=None)
    save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_br, path='GT_BR.xlsx')


def read_trends_frames(country):
    if country=='CL':
        return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_CL.xlsx')
    elif country=='BR':
        return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_BR.xlsx')


def report():
    form = st.form('Report')
    start_date = str(date.today() - timedelta(5 * 365))
    select_countries = form.multiselect('¿Qué país(es) desea visualizar?', ['Todos', 'Chile', 'Brazil'])
    if 'Todos' in select_countries:
        select_countries = ['Chile', 'Brazil']
    update_data = form.form_submit_button("Actualizar Datos")
    accept = form.form_submit_button('Visualizar')
    col1, col2 = st.columns(2)

    if update_data:
        xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx')
        industry_filter = ['Pesca', 'Agricola', 'Financials-RP']
        keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names
                         if sheet not in industry_filter}
        xls.close()
        del xls

        # Arreglamos una llave porque una hoja de excel alcanza el máximo de caracteres posible para un nombre.
        new_key = "Real Estate Management & Development"
        old_key = "Real Estate Management & Develo"
        keywords_dict[new_key] = keywords_dict.pop(old_key)

        trends_dict = trends_request(keywords_dict, date.today())
        trends_frames_excel(trends_dict)
        ud = pd.read_excel('Data/update_data.xlsx')
        ud = ud[ud['View'] != 'Google Trends']
        today = date.today().strftime('%d-%m-%Y')
        ud = ud.append({"View": 'Google Trends',
                    "Last_Update": today}, ignore_index=True)
        ud.to_excel('Data/update_data.xlsx', index=False)

    if accept:
        close_price_dict, market_cap_dict = data_request(select_countries, start_date)

        ew_index = {}
        mw_index = {}
        country_index = {}

        if select_countries == ['Brazil']:
            dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index)
                                           .union(set(close_price_dict['Brazil'].index))))}
        elif select_countries == ['Chile']:
            dates = {'Chile': sorted(list(set(market_cap_dict['Chile'].index)
                                          .union(set(close_price_dict['Chile'].index))))}
        else:
            dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index)
                                           .union(set(close_price_dict['Brazil'].index)))),
                     'Chile': sorted(list(set(market_cap_dict['Chile'].index)
                                          .union(set(close_price_dict['Chile'].index))))}

        for country in select_countries:
            mkt = market_cap_dict[country]
            cp = close_price_dict[country]
            w = mkt.div(mkt.sum(1).values, axis=0)
            rets = cp.pct_change()
            country_index[country] = pd.DataFrame({'MW': (w * rets).sum(1),
                                                   'EW': rets.mean(1)}).fillna(0)
            industries_1 = np.unique(company_db[['LV1']].values)
            industries_2 = np.unique(company_db[['LV2']].values)
            industries = np.unique(np.concatenate([industries_1, industries_2]))
            df_mw_index = pd.DataFrame(columns=industries, index=dates[country])
            df_ew_index = pd.DataFrame(columns=industries, index=dates[country])
            for industry in industries:
                industry = str(industry)
                mc = mkt.loc[:, mkt.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])]
                prices = cp.loc[:, cp.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])]

                w = mc.div(mc.sum(1).values, axis=0)
                rets = prices.pct_change()
                df_mw_index[industry] = (w * rets).sum(1)
                df_ew_index[industry] = rets.mean(1)
            mw_index[country] = df_mw_index.fillna(0)
            ew_index[country] = df_ew_index.fillna(0)

        xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx')
        industry_filter = ['Pesca', 'Agricola', 'Financials-RP', 'Agriculture']
        keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names
                         if sheet not in industry_filter}
        xls.close()
        del xls

        new_key = "Real Estate Management & Development"
        old_key = "Real Estate Management & Develo"
        keywords_dict[new_key] = keywords_dict.pop(old_key)

        trends_frames = {}
        trends_frames_cl = read_trends_frames('CL')
        trends_frames_br = read_trends_frames('BR')

        for key_cl in trends_frames_cl.keys():
            trends_frames_cl[key_cl] = trends_frames_cl[key_cl].drop(columns='Unnamed: 0')

        for key_br in trends_frames_br.keys():
            trends_frames_br[key_br] = trends_frames_br[key_br].drop(columns='Unnamed: 0')

        for industry in keywords_dict.keys():
            if not industry=='Restaurantes':
                countries_in_industry = keywords_dict[industry].columns
                trends_frames[industry] = {}
                for c in countries_in_industry:
                    if c=='CL':
                        if industry == 'Real Estate Management & Development':
                            index = trends_frames_cl['Real Estate-CL'].index
                            trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl['Real Estate-CL'].columns,
                                                                      index=index)
                        else:
                            index = trends_frames_cl[industry+'-CL'].index
                            trends_frames_cl[industry+'-CL'] = trends_frames_cl[industry+'-CL'].loc[:, trends_frames_cl[industry+'-CL'].columns.notnull()]
                            trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl[industry+'-CL'].columns,
                                                                      index=index)
                    elif c=='BR':
                        if industry == 'Real Estate Management & Development':
                            index = trends_frames_br['Real Estate-BR'].index
                            trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br['Real Estate-BR'].columns,
                                                                      index=index)
                        else:
                            index = trends_frames_br[industry + '-BR'].index
                            trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br[industry+'-BR'].columns,
                                                                      index=index)
                if 'CL' in countries_in_industry:
                    if industry == 'Real Estate Management & Development':
                        for col_cl in trends_frames_cl['Real Estate-CL'].columns:
                            if col_cl in keywords_dict[industry]['CL'].values:
                                trends_frames[industry]['CL'][col_cl] = trends_frames_cl['Real Estate-CL'][col_cl].dropna()
                    else:
                        for col_cl in trends_frames_cl[industry+'-CL'].columns:
                            if col_cl in keywords_dict[industry]['CL'].values:
                                trends_frames[industry]['CL'][col_cl] = trends_frames_cl[industry+'-CL'][col_cl].dropna()
                if 'BR' in countries_in_industry:
                    if industry == 'Real Estate Management & Development':
                        for col_br in trends_frames_br['Real Estate-BR'].columns:
                            if col_br in keywords_dict[industry]['BR'].values:
                                trends_frames[industry]['BR'][col_br] = trends_frames_br['Real Estate-BR'][col_br].dropna()
                    else:
                        for col_br in trends_frames_br[industry+'-BR'].columns:
                            if col_br in keywords_dict[industry]['BR'].values:
                                trends_frames[industry]['BR'][col_br] = trends_frames_br[industry+'-BR'][col_br].dropna()
        deseason = True
        n_words = 5
        for industry in keywords_dict.keys():
            if not industry == 'Restaurantes':
                countries_in_industry = keywords_dict[industry].columns
                for c in countries_in_industry:
                    trends_frames[industry][c] = trends_frames[industry][c].loc[:, trends_frames[industry][c].columns.notnull()]

        summary = pd.DataFrame()
        fig1 = make_subplots(rows=2, cols=1,
                             subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'],
                             horizontal_spacing=0.6, )
        fig2 = make_subplots(rows=2, cols=1,
                             subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'],
                             horizontal_spacing=0.6)
        for industry, dict_ in trends_frames.items():
            for country, df_ in dict_.items():
                if deseason:
                    df_ = pd.DataFrame({col: df_[col] -
                                             seasonal_decompose(df_[col], period=8).seasonal
                                        for col in df_.columns})
                summary[f'{industry}-{country}'] = df_.mean(1)
        summary = (summary - summary.mean()) / summary.std()

        delta_w = summary.diff(1).iloc[-1].sort_values(ascending=True)
        delta_m = summary.diff(4).iloc[-1].sort_values(ascending=True)
        delta_3m = summary.diff(12).iloc[-1].sort_values(ascending=True)
        delta_ytd = summary.resample('Y').last().diff().iloc[-1].sort_values(ascending=True)

        fig1.add_trace(go.Bar(x=delta_w.array, y=delta_w.index, orientation='h', marker_color=colors2[2],
                              showlegend=False), row=1, col=1)

        fig2.add_trace(go.Bar(x=delta_m.array, y=delta_m.index, orientation='h', marker_color=colors2[2],
                              showlegend=False), row=1, col=1)

        fig1.add_trace(go.Bar(x=delta_3m.array, y=delta_3m.index, orientation='h', marker_color=colors2[2],
                              showlegend=False), row=2, col=1)

        fig2.add_trace(go.Bar(x=delta_ytd.array, y=delta_ytd.index, orientation='h', marker_color=colors2[2],
                              showlegend=False), row=2, col=1)

        fig1.update_layout(title_text='Cambios en las Búsquedas', margin_b=0, margin_t=50, margin_r=0, margin_l=0)
        fig2.update_layout(margin_b=0, margin_t=50, margin_r=0, margin_l=0)
        col1.plotly_chart(fig1, use_container_width=True)
        col2.plotly_chart(fig2, use_container_width=True)

        for industry in trends_frames:
            for i, (country, data) in enumerate(trends_frames[industry].items()):
                if countries_dict[country] in select_countries:

                    fig_indices1 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}],
                                                                        [{"secondary_y": False}]],
                                                 subplot_titles=['GT (zscore) vs Spread Histórico',
                                                                 'Variación YoY GT Histórico'],
                                                 horizontal_spacing=0.)
                    fig_indices2 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}],
                                                                        [{"secondary_y": False}]],
                                                 subplot_titles=['GT (zscore) vs Spread Último Año',
                                                                 'Variación YoY GT Último año'],
                                                 horizontal_spacing=0.3)

                    aux_df = summary[f'{industry}-{country}']
                    aux_df.index.name = ''
                    mm_year = aux_df.rolling(52).mean()
                    mm_half = aux_df.rolling(26).mean()
                    mm_quarter = aux_df.rolling(13).mean()
                    mm_month = aux_df.rolling(4).mean()
                    mm = pd.concat([mm_year, mm_half, mm_quarter, mm_month], axis=1)
                    mm.columns = ['1Y', '6M', '3M', '1M']
                    if deseason:
                        p = '3M'
                    else:
                        p = '1Y'

                    fig_indices1.add_trace(go.Scatter(x=mm[p].index, y=mm[p].array, line=dict(color=colors2[0]),
                                                      showlegend=True, name=f'{p} MM GT Index'),
                                           secondary_y=False, row=1, col=1)

                    fig_indices1.update_layout(title_text=f'{industry} - {country}')
                    fig_indices2.add_trace(
                        go.Scatter(x=mm[p].iloc[-52:].index, y=mm[p].iloc[-52:].array, line=dict(color=colors2[0]),
                                   showlegend=False, name=f'{p} MM GT Index'),
                        secondary_y=False, row=1, col=1)

                    mm_4w = data.mean(1).rolling(4).mean()
                    yoy = mm_4w.pct_change(52)
                    aux2 = pd.concat([yoy], axis=1)
                    aux2.columns = ['YoY']
                    aux2.index.name = ''

                    fig_indices1.add_trace(go.Bar(x=aux2.dropna().index, y=aux2.dropna()['YoY'],
                                                  marker_color=colors2[1], showlegend=False), row=2, col=1)
                    fig_indices2.add_trace(go.Bar(x=aux2.dropna().iloc[-52:].index, y=aux2.dropna()['YoY'].iloc[-52:].array,
                                                  marker_color=colors2[1], showlegend=False), row=2, col=1)

                    if country == 'US' and industry == 'Pesca':
                        country_ = 'Chile'
                    else:
                        country_ = countries_dict[country]

                    spread_mw = (mw_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) -
                                 country_index[country_]['MW'].rolling(52).apply(lambda x: (1 + x).prod()))
                    spread_ew = (ew_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) -
                                 country_index[country_]['EW'].rolling(52).apply(lambda x: (1 + x).prod()))

                    spread = pd.DataFrame({'EW': spread_ew, 'MW': spread_mw})

                    fig_indices1.add_trace(go.Scatter(x=spread['MW'].dropna().index, y=spread['MW'].dropna().array,
                                                      name='Spread MW', line=dict(color=colors2[3])),
                                           secondary_y=True, row=1, col=1)

                    fig_indices2.add_trace(
                        go.Scatter(x=spread['MW'].iloc[-260:].dropna().index, y=spread['MW'].iloc[-260:].dropna().array,
                                   name='Spread MW', line=dict(color=colors2[3])),
                        secondary_y=True, row=1, col=1)

                    fig_indices1.update_xaxes(showticklabels=False)
                    fig_indices2.update_xaxes(showticklabels=False)
                    fig_indices1.layout.update(xaxis_rangeslider_visible=False, margin_b=20,
                                               margin_r=20, margin_l=20,
                                               legend=dict(orientation="h",
                                                           yanchor="top",
                                                           y=0.6,
                                                           xanchor="right",
                                                           x=1))
                    fig_indices2.layout.update(xaxis_rangeslider_visible=False,
                                               margin_b=20,
                                               margin_r=20, margin_l=20,
                                               legend=dict(orientation="h",
                                                           yanchor="top",
                                                           y=0.6,
                                                           xanchor="right",
                                                           x=1))
                    fig_indices1.update_xaxes(showticklabels=True, row=2,
                                              col=1)
                    fig_indices2.update_xaxes(showticklabels=True, row=2,
                                              col=1)

                    fig_indices1.update_yaxes(tickformat=',.0%', row=2, col=1)
                    fig_indices2.update_yaxes(tickformat=',.0%', row=2, col=1)

                    if deseason:
                        df1 = pd.DataFrame({col: data[col] -
                                            seasonal_decompose(data[col]).seasonal
                                            for col in data.columns})
                    else:
                        df1 = data

                    # Top word's table plot
                    last_week = df1.iloc[-1].sort_values(ascending=False)[:n_words] / 100
                    all_time = df1.mean().sort_values(ascending=False)[:n_words] / 100

                    fig_W = make_subplots(subplot_titles=['Top Words en ' + f'{industry} - {country}'])

                    table = pd.concat([pd.Series(last_week.index),
                                       pd.Series(all_time.index)], axis=1)
                    table.columns = ['Top 1W', 'Top 5Y']
                    fig_W.add_trace(go.Table(header=dict(values=table.columns),
                                             cells=dict(values=[table['Top 1W'].values, table['Top 5Y'].values])))
                    fig_W.update_layout(margin_b=0, margin_t=50,
                                               margin_r=0, margin_l=0,
                                               height=200)
                    fig_indices1.update_layout(margin_b=0, margin_t=50,
                                               margin_r=0, margin_l=0,
                                               height=600)
                    fig_indices2.update_layout(margin_b=0, margin_t=50,
                                               margin_r=0, margin_l=0,
                                               height=600)
                    col1.plotly_chart(fig_indices1, use_container_width=True)
                    col2.plotly_chart(fig_indices2, use_container_width=True)

                    fig_W.update_layout(margin_b=0, margin_t=30, margin_r=10, margin_l=0)
                    st.plotly_chart(fig_W, use_container_width=True)