import streamlit as st
import pandas as pd
import plotly.express as px


# Set page title and layout
st.set_page_config(page_title="MoviePy Uber Output", layout="wide")

# Title of the app
st.title("MoviePy Uber Output")

# Load and display the concatenated video
st.subheader("Video de Análisis")

# Define the filename of the concatenated video
video_filename = 'final_analisis_video.mp4'

try:
    with open(video_filename, 'rb') as f:
        video_bytes = f.read()
        st.video(video_bytes)
except FileNotFoundError:
    st.warning(f"Archivo de video {video_filename} no encontrado.")

# Load the data
@st.cache_data
def load_data():
    uber = pd.read_csv('uber-raw-data-janjune-15.csv', 
                       names=['Dispatching_base_num', 'Pickup_date', 'Affiliated_base_num', 'locationID'],
                       skiprows=1)  # Skip the header row
    return uber

uber = load_data()


# Data Preprocessing/Cleaning
st.subheader("Preprocesamiento y Limpieza de Datos")
st.write("Información del DataFrame:")
st.write(uber.info())
st.write("Suma de valores nulos en cada columna:")
st.write(uber.isnull().sum())
st.write("Suma de filas duplicadas:", uber.duplicated().sum())
uber.drop_duplicates(inplace=True)
st.write("Suma de filas duplicadas después de eliminar duplicados:", uber.duplicated().sum())

# Convert Pickup_date to datetime
uber['Pickup_date'] = pd.to_datetime(uber['Pickup_date'], errors='coerce')  # Handle invalid dates
st.write("Primera fecha de recogida:", uber['Pickup_date'][0])

# Check for NaT values (invalid dates)
if uber['Pickup_date'].isna().any():
    st.warning("Advertencia: Algunas fechas de recogida no pudieron ser convertidas y se marcaron como NaT.")
    st.write("Filas con fechas inválidas:")
    st.write(uber[uber['Pickup_date'].isna()])

# Drop rows with NaT values in Pickup_date
uber.dropna(subset=['Pickup_date'], inplace=True)

# 1. Finding the month with maximum uber pickups
st.subheader("Mes con el máximo de recogidas de Uber")
uber['Month'] = uber['Pickup_date'].dt.month
monthly_pickups = uber['Month'].value_counts().sort_index()
max_month = monthly_pickups.idxmax()
st.write(f"Mes con el máximo de recogidas de Uber: {max_month}")

# Plotly visualization for monthly pickups
fig1 = px.bar(monthly_pickups, x=monthly_pickups.index, y=monthly_pickups.values, 
              labels={'x': 'Mes', 'y': 'Número de Recogidas'}, 
              title='Recogidas Mensuales de Uber')
st.plotly_chart(fig1)

# 2. Which day has more demand for uber
st.subheader("Día con más demanda de Uber")
uber['Day'] = uber['Pickup_date'].dt.day_name()
daily_pickups = uber['Day'].value_counts()
max_day = daily_pickups.idxmax()
st.write(f"Día con más demanda de Uber: {max_day}")

# Plotly visualization for daily pickups
fig2 = px.bar(daily_pickups, x=daily_pickups.index, y=daily_pickups.values, 
              labels={'x': 'Día', 'y': 'Número de Recogidas'}, 
              title='Recogidas Diarias de Uber')
st.plotly_chart(fig2)

# 3. Finding Peak hours on all days
st.subheader("Hora pico para recogidas de Uber")
uber['Hour'] = uber['Pickup_date'].dt.hour
hourly_pickups = uber['Hour'].value_counts().sort_index()
peak_hour = hourly_pickups.idxmax()
st.write(f"Hora pico para recogidas de Uber: {peak_hour}")

# Plotly visualization for hourly pickups
fig3 = px.bar(hourly_pickups, x=hourly_pickups.index, y=hourly_pickups.values, 
              labels={'x': 'Hora', 'y': 'Número de Recogidas'}, 
              title='Recogidas por Hora de Uber')
st.plotly_chart(fig3)

# 4. Finding out how many vehicles at each base number
st.subheader("Número de vehículos en cada número base")
base_counts = uber['Dispatching_base_num'].value_counts()
st.write("Número de vehículos en cada número base:")
st.write(base_counts)

# Plotly visualization for base number counts
fig4 = px.bar(base_counts, x=base_counts.index, y=base_counts.values, 
              labels={'x': 'Número Base', 'y': 'Número de Vehículos'}, 
              title='Número de Vehículos en Cada Número Base')
st.plotly_chart(fig4)

# 5. Analyzing maximum uber pickups based on locations
st.subheader("Ubicación con el máximo de recogidas de Uber")
location_counts = uber['locationID'].value_counts()
max_location = location_counts.idxmax()
st.write(f"Ubicación con el máximo de recogidas de Uber: {max_location}")

# Plotly visualization for location-based pickups
fig5 = px.bar(location_counts, x=location_counts.index, y=location_counts.values, 
              labels={'x': 'ID de Ubicación', 'y': 'Número de Recogidas'}, 
              title='Recogidas de Uber por Ubicación')
st.plotly_chart(fig5)

# Print inferences
st.subheader("Inferencias")
st.write(f"1. Mes con el máximo de recogidas de Uber: {max_month}")
st.write(f"2. Día con más demanda de Uber: {max_day}")
st.write(f"3. Hora pico para recogidas de Uber: {peak_hour}")
st.write("4. Número de vehículos en cada número base:")
st.write(base_counts)
st.write(f"5. Ubicación con el máximo de recogidas de Uber: {max_location}")