Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import plotly.express as px
|
5 |
+
import plotly.graph_objects as go
|
6 |
+
from PIL import Image
|
7 |
+
from plotly.subplots import make_subplots
|
8 |
+
|
9 |
+
### Config
|
10 |
+
st.set_page_config(
|
11 |
+
page_title="GetAround Analysis",
|
12 |
+
page_icon= "🚗",
|
13 |
+
layout="wide"
|
14 |
+
)
|
15 |
+
|
16 |
+
DATA_URL = ("https://jedha-deployment.s3.amazonaws.com/get_around_delay_analysis.xlsx")
|
17 |
+
|
18 |
+
st.title("GetAround Delay Analysis Web Dashboard 🚗")
|
19 |
+
|
20 |
+
st.markdown("""
|
21 |
+
Welcome to the Streamlit Dashboard of Getaround app! 👋
|
22 |
+
|
23 |
+
""")
|
24 |
+
|
25 |
+
@st.cache_data
|
26 |
+
def load_data():
|
27 |
+
data = pd.read_excel(DATA_URL, sheet_name='rentals_data')
|
28 |
+
return data
|
29 |
+
|
30 |
+
st.header("Load and showcase data", divider="red")
|
31 |
+
|
32 |
+
data_load_state = st.text('Loading data ...')
|
33 |
+
data = load_data()
|
34 |
+
data_load_state.text("")
|
35 |
+
|
36 |
+
col1, col2 = st.columns(2)
|
37 |
+
with col1:
|
38 |
+
st.write("Here's the 50 first rows of the dataset")
|
39 |
+
st.write(data.head(50))
|
40 |
+
|
41 |
+
with col2:
|
42 |
+
if st.checkbox('Show metadata'):
|
43 |
+
st.subheader("Meaning of each column")
|
44 |
+
metadata = pd.read_excel(DATA_URL, sheet_name='Documentation')
|
45 |
+
pd.set_option('display.max_colwidth', None)
|
46 |
+
st.write(metadata)
|
47 |
+
|
48 |
+
# Graph showing the late checkouts proportions
|
49 |
+
st.header("How often are drivers late for checkout?")
|
50 |
+
delay_perc = (data["delay_at_checkout_in_minutes"]>=0).value_counts(normalize=True)
|
51 |
+
fig = go.Figure(data=[go.Pie(labels=delay_perc.rename(index={True:'Late',False:'In advance or in time'}).index, values=delay_perc.values, textinfo='percent', hole=.5)])
|
52 |
+
fig.update_traces(marker=dict(colors=['#D13838','#FF9F9F']))
|
53 |
+
st.plotly_chart(fig)
|
54 |
+
|
55 |
+
|
56 |
+
st.header("Time Intervals and Delays")
|
57 |
+
col1, col2 = st.columns(2)
|
58 |
+
with col1:
|
59 |
+
st.markdown("**Time Intervals between anticipated check-outs and next-check-in**")
|
60 |
+
fig = px.histogram(data, x="time_delta_with_previous_rental_in_minutes",color_discrete_sequence=["indianred"])
|
61 |
+
fig.update_layout(bargap=0.01)
|
62 |
+
st.plotly_chart(fig,height = 400, use_container_width=True)
|
63 |
+
with col2:
|
64 |
+
st.markdown("**Delays in minutes**")
|
65 |
+
#Removing outliers for delay_at_checkout_in_minutes
|
66 |
+
lower_bound = data['delay_at_checkout_in_minutes'].mean() - 3*data['delay_at_checkout_in_minutes'].std()
|
67 |
+
upper_bound = data['delay_at_checkout_in_minutes'].mean() + 3*data['delay_at_checkout_in_minutes'].std()
|
68 |
+
|
69 |
+
df = data[(data['delay_at_checkout_in_minutes'] > lower_bound) & (data['delay_at_checkout_in_minutes'] < upper_bound)]
|
70 |
+
fig = px.histogram(df, x="delay_at_checkout_in_minutes", color_discrete_sequence=["indianred"])
|
71 |
+
fig.update_layout(bargap=0.01)
|
72 |
+
st.plotly_chart(fig, height = 400 , use_container_width=True)
|
73 |
+
|
74 |
+
st.subheader("Some data insights")
|
75 |
+
|
76 |
+
#Plotting 3 pies
|
77 |
+
pie = make_subplots(
|
78 |
+
rows=1,
|
79 |
+
cols=3,
|
80 |
+
specs=[[{"type": "domain"}, {"type": "domain"},{"type": "domain"}]],
|
81 |
+
shared_yaxes=True,
|
82 |
+
subplot_titles=["State of rentals", "Checkin type","Proportion of cancellation by checkin type"],
|
83 |
+
)
|
84 |
+
|
85 |
+
state_perc = data["state"].value_counts() / len(data) * 100
|
86 |
+
checkin_perc = data["checkin_type"].value_counts() / len(data) * 100
|
87 |
+
canceled = (data[data['state']=='canceled']['checkin_type'].value_counts() / len(data[data['state']=='canceled'])) * 100
|
88 |
+
|
89 |
+
pie.add_trace(
|
90 |
+
go.Pie(
|
91 |
+
values=state_perc,
|
92 |
+
labels=state_perc.index,
|
93 |
+
marker_colors=["#E73636","#FF9F9F"],
|
94 |
+
),
|
95 |
+
row=1,
|
96 |
+
col=1,
|
97 |
+
)
|
98 |
+
|
99 |
+
pie.add_trace(
|
100 |
+
go.Pie(
|
101 |
+
values=checkin_perc,
|
102 |
+
labels=checkin_perc.index,
|
103 |
+
marker_colors=["#202EBD", "#13E7E3"],
|
104 |
+
),
|
105 |
+
row=1,
|
106 |
+
col=2,
|
107 |
+
)
|
108 |
+
|
109 |
+
pie.add_trace(
|
110 |
+
go.Pie(
|
111 |
+
values=canceled,
|
112 |
+
labels=canceled.index,
|
113 |
+
marker_colors=["#202EBD", "#13E7E3"],
|
114 |
+
),
|
115 |
+
row=1,
|
116 |
+
col=3,
|
117 |
+
)
|
118 |
+
|
119 |
+
pie.update_traces(hole=0.4, textinfo="label+percent")
|
120 |
+
|
121 |
+
pie.update_layout(width=1200, showlegend=True)
|
122 |
+
|
123 |
+
st.plotly_chart(pie)
|
124 |
+
|
125 |
+
st.subheader("Quick analysis")
|
126 |
+
st.markdown(""" * 80% of rentals are made via connect checkin type.
|
127 |
+
* Around 15% of the overall rentals end up with cancellation.
|
128 |
+
* However, although connect checkin type only represents 20% of the rentals, we can underline that 25% of the cancellations come from connect checkin type.
|
129 |
+
* That highlights a bigger impact from this kind on rental flow over the cancellations.""")
|
130 |
+
|
131 |
+
st.write("")
|
132 |
+
|
133 |
+
# Difference between delay at checkout and the delta with previous rental
|
134 |
+
data['minutes_passed_checkin_time'] = data['delay_at_checkout_in_minutes'] - data['time_delta_with_previous_rental_in_minutes']
|
135 |
+
|
136 |
+
impacted_df = data[~data["time_delta_with_previous_rental_in_minutes"].isna()]
|
137 |
+
|
138 |
+
st.header("How many impacted and solved rentals cases depending on threshold and scope ?")
|
139 |
+
|
140 |
+
threshold_range = np.arange(0, 60*12, step=15) # 15min intervals for 12 hours
|
141 |
+
impacted_list_mobile = []
|
142 |
+
impacted_list_connect = []
|
143 |
+
impacted_list_total = []
|
144 |
+
solved_list_mobile = []
|
145 |
+
solved_list_connect = []
|
146 |
+
solved_list_total = []
|
147 |
+
|
148 |
+
solved_list = []
|
149 |
+
for t in range(721):
|
150 |
+
connect_impact = impacted_df[impacted_df['checkin_type'] == 'connect']
|
151 |
+
mobile_impact = impacted_df[impacted_df['checkin_type'] == 'mobile']
|
152 |
+
connect_impact = connect_impact[connect_impact['time_delta_with_previous_rental_in_minutes'] < t]
|
153 |
+
mobile_impact = mobile_impact[mobile_impact['time_delta_with_previous_rental_in_minutes'] < t]
|
154 |
+
impacted = impacted_df[impacted_df['time_delta_with_previous_rental_in_minutes'] < t]
|
155 |
+
impacted_list_connect.append(len(connect_impact))
|
156 |
+
impacted_list_mobile.append(len(mobile_impact))
|
157 |
+
impacted_list_total.append(len(impacted))
|
158 |
+
|
159 |
+
solved = impacted_df[data['minutes_passed_checkin_time'] > 0]
|
160 |
+
connect_solved = solved[solved['checkin_type'] == 'connect']
|
161 |
+
mobile_solved = solved[solved['checkin_type'] == 'mobile']
|
162 |
+
connect_solved = connect_solved[connect_solved['delay_at_checkout_in_minutes'] < t]
|
163 |
+
mobile_solved = mobile_solved[mobile_solved['delay_at_checkout_in_minutes'] < t]
|
164 |
+
solved = solved[solved['delay_at_checkout_in_minutes'] < t]
|
165 |
+
solved_list_connect.append(len(connect_solved))
|
166 |
+
solved_list_mobile.append(len(mobile_solved))
|
167 |
+
solved_list_total.append(len(solved))
|
168 |
+
|
169 |
+
|
170 |
+
# Convert range to a list for 'x' argument
|
171 |
+
x_values = list(range(721))
|
172 |
+
|
173 |
+
col1, col2 = st.columns(2)
|
174 |
+
with col1:
|
175 |
+
|
176 |
+
# Creation of the 3 traces
|
177 |
+
total_impacted_cars = go.Scatter(x=x_values, y=impacted_list_total, name='All cars')
|
178 |
+
impacted_connect_cars = go.Scatter(x=x_values, y=impacted_list_connect, name='Connect cars')
|
179 |
+
impacted_mobile_cars = go.Scatter(x=x_values, y=impacted_list_mobile, name='Mobile cars')
|
180 |
+
|
181 |
+
# Create layout for the plot
|
182 |
+
layout = go.Layout(
|
183 |
+
title='Number of impacted cases by threshold',
|
184 |
+
xaxis=dict(title='Threshold in minutes'),
|
185 |
+
yaxis=dict(title='Number of impacted cases'),
|
186 |
+
xaxis_tickvals=list(range(0, 721, 60)),# 60 minutes step from 0 to 12h
|
187 |
+
legend=dict(orientation='h', yanchor='bottom', xanchor='right',y=1.02, x=1)
|
188 |
+
)
|
189 |
+
|
190 |
+
# Create figure and add traces to it
|
191 |
+
fig = go.Figure(data=[total_impacted_cars, impacted_connect_cars, impacted_mobile_cars], layout=layout)
|
192 |
+
st.plotly_chart(fig, width = 800, height = 600, use_container_width=True)
|
193 |
+
|
194 |
+
with col2:
|
195 |
+
|
196 |
+
# Creation of the 3 traces
|
197 |
+
total_solved_cars = go.Scatter(x=x_values, y=solved_list_total, name='All cars')
|
198 |
+
connect_solved_cars = go.Scatter(x=x_values, y=solved_list_connect, name='Connect cars')
|
199 |
+
mobile_solved_cars = go.Scatter(x=x_values, y=solved_list_mobile, name='Mobile cars')
|
200 |
+
|
201 |
+
# Create layout for the plot
|
202 |
+
layout = go.Layout(
|
203 |
+
title='Number of solved cases by threshold',
|
204 |
+
xaxis=dict(title='Threshold in minutes'),
|
205 |
+
yaxis=dict(title='Number of cases solved'),
|
206 |
+
xaxis_tickvals=list(range(0, 721, 60)),# 60 minutes step from 0 to 12h
|
207 |
+
legend=dict(orientation='h', yanchor='bottom', xanchor='right',y=1.02, x=1)
|
208 |
+
)
|
209 |
+
|
210 |
+
# Create figure and add traces to it
|
211 |
+
fig = go.Figure(data=[total_solved_cars, connect_solved_cars, mobile_solved_cars], layout=layout)
|
212 |
+
st.plotly_chart(fig, width = 800, height = 600, use_container_width=True)
|
213 |
+
|
214 |
+
st.subheader("Graph analysis")
|
215 |
+
st.markdown("""* The curve of solved cases tends to noticeably flatten out at around **120 minutes**, or even up to 180 minutes. * We might be tempted to implement a much higher threshold in order to solve as many problem cases as possible.
|
216 |
+
* But we're faced with a twofold problem : the higher the threshold, the greater the impact on the number of cars available and obviously on our revenue.
|
217 |
+
* So we need to find the right balance between the number of problem cases solved and the proportion of revenue impacted.
|
218 |
+
* With this in mind, :red[**120 minutes**] threshold seems to be a good compromise for our business.""")
|
219 |
+
|
220 |
+
st.write("")
|
221 |
+
st.header("Dynamic playground of threshold and scope effects")
|
222 |
+
st.markdown("You can here adjust the threshold and scope you desire to see the effects on data")
|
223 |
+
## Threshold and scope form
|
224 |
+
with st.form("threshold_testing"):
|
225 |
+
threshold = st.slider("Choose the threshold in minutes", 0,720,0)
|
226 |
+
checkin_type = st.radio("Choose the desired checkin type", ["All", "Connect", "Mobile"])
|
227 |
+
submit = st.form_submit_button("Let's check it out")
|
228 |
+
|
229 |
+
if submit:
|
230 |
+
# Focus only on the selected checkin type
|
231 |
+
st.markdown(f"With a threshold of **{threshold}** and for **{checkin_type}** scope")
|
232 |
+
if checkin_type == "All":
|
233 |
+
st.metric(f"The number of cases impacted is :",impacted_list_total[threshold])
|
234 |
+
st.metric("The number of cases solved is :",solved_list_total[threshold])
|
235 |
+
elif checkin_type == "Connect":
|
236 |
+
st.metric(f"The number of cases impacted is :",impacted_list_connect[threshold])
|
237 |
+
st.metric("The number of cases solved is :",solved_list_connect[threshold])
|
238 |
+
else :
|
239 |
+
st.metric(f"The number of cases impacted is :",impacted_list_mobile[threshold])
|
240 |
+
st.metric("The number of cases solved is :",solved_list_mobile[threshold])
|
241 |
+
|
242 |
+
|