khaldii commited on
Commit
a50a85d
·
verified ·
1 Parent(s): 379651a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +242 -0
app.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from PIL import Image
7
+ from plotly.subplots import make_subplots
8
+
9
+ ### Config
10
+ st.set_page_config(
11
+ page_title="GetAround Analysis",
12
+ page_icon= "🚗",
13
+ layout="wide"
14
+ )
15
+
16
+ DATA_URL = ("https://jedha-deployment.s3.amazonaws.com/get_around_delay_analysis.xlsx")
17
+
18
+ st.title("GetAround Delay Analysis Web Dashboard 🚗")
19
+
20
+ st.markdown("""
21
+ Welcome to the Streamlit Dashboard of Getaround app! 👋
22
+
23
+ """)
24
+
25
+ @st.cache_data
26
+ def load_data():
27
+ data = pd.read_excel(DATA_URL, sheet_name='rentals_data')
28
+ return data
29
+
30
+ st.header("Load and showcase data", divider="red")
31
+
32
+ data_load_state = st.text('Loading data ...')
33
+ data = load_data()
34
+ data_load_state.text("")
35
+
36
+ col1, col2 = st.columns(2)
37
+ with col1:
38
+ st.write("Here's the 50 first rows of the dataset")
39
+ st.write(data.head(50))
40
+
41
+ with col2:
42
+ if st.checkbox('Show metadata'):
43
+ st.subheader("Meaning of each column")
44
+ metadata = pd.read_excel(DATA_URL, sheet_name='Documentation')
45
+ pd.set_option('display.max_colwidth', None)
46
+ st.write(metadata)
47
+
48
+ # Graph showing the late checkouts proportions
49
+ st.header("How often are drivers late for checkout?")
50
+ delay_perc = (data["delay_at_checkout_in_minutes"]>=0).value_counts(normalize=True)
51
+ fig = go.Figure(data=[go.Pie(labels=delay_perc.rename(index={True:'Late',False:'In advance or in time'}).index, values=delay_perc.values, textinfo='percent', hole=.5)])
52
+ fig.update_traces(marker=dict(colors=['#D13838','#FF9F9F']))
53
+ st.plotly_chart(fig)
54
+
55
+
56
+ st.header("Time Intervals and Delays")
57
+ col1, col2 = st.columns(2)
58
+ with col1:
59
+ st.markdown("**Time Intervals between anticipated check-outs and next-check-in**")
60
+ fig = px.histogram(data, x="time_delta_with_previous_rental_in_minutes",color_discrete_sequence=["indianred"])
61
+ fig.update_layout(bargap=0.01)
62
+ st.plotly_chart(fig,height = 400, use_container_width=True)
63
+ with col2:
64
+ st.markdown("**Delays in minutes**")
65
+ #Removing outliers for delay_at_checkout_in_minutes
66
+ lower_bound = data['delay_at_checkout_in_minutes'].mean() - 3*data['delay_at_checkout_in_minutes'].std()
67
+ upper_bound = data['delay_at_checkout_in_minutes'].mean() + 3*data['delay_at_checkout_in_minutes'].std()
68
+
69
+ df = data[(data['delay_at_checkout_in_minutes'] > lower_bound) & (data['delay_at_checkout_in_minutes'] < upper_bound)]
70
+ fig = px.histogram(df, x="delay_at_checkout_in_minutes", color_discrete_sequence=["indianred"])
71
+ fig.update_layout(bargap=0.01)
72
+ st.plotly_chart(fig, height = 400 , use_container_width=True)
73
+
74
+ st.subheader("Some data insights")
75
+
76
+ #Plotting 3 pies
77
+ pie = make_subplots(
78
+ rows=1,
79
+ cols=3,
80
+ specs=[[{"type": "domain"}, {"type": "domain"},{"type": "domain"}]],
81
+ shared_yaxes=True,
82
+ subplot_titles=["State of rentals", "Checkin type","Proportion of cancellation by checkin type"],
83
+ )
84
+
85
+ state_perc = data["state"].value_counts() / len(data) * 100
86
+ checkin_perc = data["checkin_type"].value_counts() / len(data) * 100
87
+ canceled = (data[data['state']=='canceled']['checkin_type'].value_counts() / len(data[data['state']=='canceled'])) * 100
88
+
89
+ pie.add_trace(
90
+ go.Pie(
91
+ values=state_perc,
92
+ labels=state_perc.index,
93
+ marker_colors=["#E73636","#FF9F9F"],
94
+ ),
95
+ row=1,
96
+ col=1,
97
+ )
98
+
99
+ pie.add_trace(
100
+ go.Pie(
101
+ values=checkin_perc,
102
+ labels=checkin_perc.index,
103
+ marker_colors=["#202EBD", "#13E7E3"],
104
+ ),
105
+ row=1,
106
+ col=2,
107
+ )
108
+
109
+ pie.add_trace(
110
+ go.Pie(
111
+ values=canceled,
112
+ labels=canceled.index,
113
+ marker_colors=["#202EBD", "#13E7E3"],
114
+ ),
115
+ row=1,
116
+ col=3,
117
+ )
118
+
119
+ pie.update_traces(hole=0.4, textinfo="label+percent")
120
+
121
+ pie.update_layout(width=1200, showlegend=True)
122
+
123
+ st.plotly_chart(pie)
124
+
125
+ st.subheader("Quick analysis")
126
+ st.markdown(""" * 80% of rentals are made via connect checkin type.
127
+ * Around 15% of the overall rentals end up with cancellation.
128
+ * However, although connect checkin type only represents 20% of the rentals, we can underline that 25% of the cancellations come from connect checkin type.
129
+ * That highlights a bigger impact from this kind on rental flow over the cancellations.""")
130
+
131
+ st.write("")
132
+
133
+ # Difference between delay at checkout and the delta with previous rental
134
+ data['minutes_passed_checkin_time'] = data['delay_at_checkout_in_minutes'] - data['time_delta_with_previous_rental_in_minutes']
135
+
136
+ impacted_df = data[~data["time_delta_with_previous_rental_in_minutes"].isna()]
137
+
138
+ st.header("How many impacted and solved rentals cases depending on threshold and scope ?")
139
+
140
+ threshold_range = np.arange(0, 60*12, step=15) # 15min intervals for 12 hours
141
+ impacted_list_mobile = []
142
+ impacted_list_connect = []
143
+ impacted_list_total = []
144
+ solved_list_mobile = []
145
+ solved_list_connect = []
146
+ solved_list_total = []
147
+
148
+ solved_list = []
149
+ for t in range(721):
150
+ connect_impact = impacted_df[impacted_df['checkin_type'] == 'connect']
151
+ mobile_impact = impacted_df[impacted_df['checkin_type'] == 'mobile']
152
+ connect_impact = connect_impact[connect_impact['time_delta_with_previous_rental_in_minutes'] < t]
153
+ mobile_impact = mobile_impact[mobile_impact['time_delta_with_previous_rental_in_minutes'] < t]
154
+ impacted = impacted_df[impacted_df['time_delta_with_previous_rental_in_minutes'] < t]
155
+ impacted_list_connect.append(len(connect_impact))
156
+ impacted_list_mobile.append(len(mobile_impact))
157
+ impacted_list_total.append(len(impacted))
158
+
159
+ solved = impacted_df[data['minutes_passed_checkin_time'] > 0]
160
+ connect_solved = solved[solved['checkin_type'] == 'connect']
161
+ mobile_solved = solved[solved['checkin_type'] == 'mobile']
162
+ connect_solved = connect_solved[connect_solved['delay_at_checkout_in_minutes'] < t]
163
+ mobile_solved = mobile_solved[mobile_solved['delay_at_checkout_in_minutes'] < t]
164
+ solved = solved[solved['delay_at_checkout_in_minutes'] < t]
165
+ solved_list_connect.append(len(connect_solved))
166
+ solved_list_mobile.append(len(mobile_solved))
167
+ solved_list_total.append(len(solved))
168
+
169
+
170
+ # Convert range to a list for 'x' argument
171
+ x_values = list(range(721))
172
+
173
+ col1, col2 = st.columns(2)
174
+ with col1:
175
+
176
+ # Creation of the 3 traces
177
+ total_impacted_cars = go.Scatter(x=x_values, y=impacted_list_total, name='All cars')
178
+ impacted_connect_cars = go.Scatter(x=x_values, y=impacted_list_connect, name='Connect cars')
179
+ impacted_mobile_cars = go.Scatter(x=x_values, y=impacted_list_mobile, name='Mobile cars')
180
+
181
+ # Create layout for the plot
182
+ layout = go.Layout(
183
+ title='Number of impacted cases by threshold',
184
+ xaxis=dict(title='Threshold in minutes'),
185
+ yaxis=dict(title='Number of impacted cases'),
186
+ xaxis_tickvals=list(range(0, 721, 60)),# 60 minutes step from 0 to 12h
187
+ legend=dict(orientation='h', yanchor='bottom', xanchor='right',y=1.02, x=1)
188
+ )
189
+
190
+ # Create figure and add traces to it
191
+ fig = go.Figure(data=[total_impacted_cars, impacted_connect_cars, impacted_mobile_cars], layout=layout)
192
+ st.plotly_chart(fig, width = 800, height = 600, use_container_width=True)
193
+
194
+ with col2:
195
+
196
+ # Creation of the 3 traces
197
+ total_solved_cars = go.Scatter(x=x_values, y=solved_list_total, name='All cars')
198
+ connect_solved_cars = go.Scatter(x=x_values, y=solved_list_connect, name='Connect cars')
199
+ mobile_solved_cars = go.Scatter(x=x_values, y=solved_list_mobile, name='Mobile cars')
200
+
201
+ # Create layout for the plot
202
+ layout = go.Layout(
203
+ title='Number of solved cases by threshold',
204
+ xaxis=dict(title='Threshold in minutes'),
205
+ yaxis=dict(title='Number of cases solved'),
206
+ xaxis_tickvals=list(range(0, 721, 60)),# 60 minutes step from 0 to 12h
207
+ legend=dict(orientation='h', yanchor='bottom', xanchor='right',y=1.02, x=1)
208
+ )
209
+
210
+ # Create figure and add traces to it
211
+ fig = go.Figure(data=[total_solved_cars, connect_solved_cars, mobile_solved_cars], layout=layout)
212
+ st.plotly_chart(fig, width = 800, height = 600, use_container_width=True)
213
+
214
+ st.subheader("Graph analysis")
215
+ st.markdown("""* The curve of solved cases tends to noticeably flatten out at around **120 minutes**, or even up to 180 minutes. * We might be tempted to implement a much higher threshold in order to solve as many problem cases as possible.
216
+ * But we're faced with a twofold problem : the higher the threshold, the greater the impact on the number of cars available and obviously on our revenue.
217
+ * So we need to find the right balance between the number of problem cases solved and the proportion of revenue impacted.
218
+ * With this in mind, :red[**120 minutes**] threshold seems to be a good compromise for our business.""")
219
+
220
+ st.write("")
221
+ st.header("Dynamic playground of threshold and scope effects")
222
+ st.markdown("You can here adjust the threshold and scope you desire to see the effects on data")
223
+ ## Threshold and scope form
224
+ with st.form("threshold_testing"):
225
+ threshold = st.slider("Choose the threshold in minutes", 0,720,0)
226
+ checkin_type = st.radio("Choose the desired checkin type", ["All", "Connect", "Mobile"])
227
+ submit = st.form_submit_button("Let's check it out")
228
+
229
+ if submit:
230
+ # Focus only on the selected checkin type
231
+ st.markdown(f"With a threshold of **{threshold}** and for **{checkin_type}** scope")
232
+ if checkin_type == "All":
233
+ st.metric(f"The number of cases impacted is :",impacted_list_total[threshold])
234
+ st.metric("The number of cases solved is :",solved_list_total[threshold])
235
+ elif checkin_type == "Connect":
236
+ st.metric(f"The number of cases impacted is :",impacted_list_connect[threshold])
237
+ st.metric("The number of cases solved is :",solved_list_connect[threshold])
238
+ else :
239
+ st.metric(f"The number of cases impacted is :",impacted_list_mobile[threshold])
240
+ st.metric("The number of cases solved is :",solved_list_mobile[threshold])
241
+
242
+