varalakshmi55 commited on
Commit
ea9f040
ยท
verified ยท
1 Parent(s): dd17fed

Upload 2 files

Browse files
Files changed (2) hide show
  1. pages/Dashboard.py +338 -0
  2. pages/predict page.py +204 -0
pages/Dashboard.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ from Utility.data_loader import load_train_series,load_train_events,load_sample_submission,load_test_series
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
8
+ from xgboost import XGBClassifier # or XGBRegressor depending on your task
9
+ import xgboost as xgb
10
+ import numpy as np
11
+
12
+ @st.cache_data
13
+ def load_sampled_data():
14
+ # df3 = pd.read_parquet("train_series.parquet", columns=['series_id', 'step', 'anglez', 'enmo'])
15
+ # df4 = pd.read_parquet("test_series.parquet", columns=['series_id', 'step', 'anglez', 'enmo'])
16
+ df2 = pd.read_csv("train_events.csv")
17
+
18
+ # Sample safely based on available data
19
+ # df3_sample = df3.sample(n=min(5_000_000, len(df3)), random_state=42)
20
+ # df4_sample = df4.sample(n=min(1_000_000, len(df4)), random_state=42)
21
+
22
+ return df2
23
+
24
+ # Load
25
+ # df3, df4, df2 = load_sampled_data()
26
+ df2 = load_sampled_data()
27
+ # df = pd.concat([df3, df4], axis=0, ignore_index=True)
28
+ # merged_df = pd.merge(df, df2, on=['series_id', 'step'], how='inner')
29
+
30
+ merged_df = pd.read_csv("merged_df.csv")
31
+
32
+ # Rename timestamp columns if they exist
33
+ if 'timestamp_x' in merged_df.columns:
34
+ merged_df.rename(columns={'timestamp_x': 'sensor_timestamp'}, inplace=True)
35
+ if 'timestamp_y' in merged_df.columns:
36
+ merged_df.rename(columns={'timestamp_y': 'event_timestamp'}, inplace=True)
37
+
38
+ st.title("๐Ÿ“Š Step Distribution Analysis")
39
+
40
+ # Layout: 2 columns
41
+ col1, col2 = st.columns([1, 1]) # Equal width
42
+ # ----- Column 1: Boxplot -----
43
+ with col1:
44
+ st.subheader("๐Ÿ“ฆ Boxplot of Step")
45
+ fig, ax = plt.subplots(figsize=(6, 4)) # Adjusted for better visibility
46
+ sns.boxplot(x=df2['step'], ax=ax, color='steelblue')
47
+ ax.set_title("Distribution of Step Count", fontsize=14)
48
+ ax.set_xlabel("Step", fontsize=12)
49
+ st.pyplot (fig)
50
+
51
+ # ----- Column 2: Insights -----
52
+ with col2:
53
+ st.subheader("๐Ÿง  Insights from the Boxplot")
54
+ st.markdown("""
55
+ <small>
56
+ <b>Central Tendency:</b><br>
57
+ - The <b>median</b> is close to the center of the box, suggesting a fairly symmetric distribution within the interquartile range (IQR).<br>
58
+ <b>Spread:</b><br>
59
+ - A <b>wide IQR</b> indicates significant variability in the step counts across sessions.<br>
60
+ <b>Outliers:</b><br>
61
+ - The <b>dots on the right</b> are outliers โ€” representing very high step counts.<br>
62
+ - These could reflect either:<br>
63
+ - <b>Legitimate long-duration recordings</b><br>
64
+ - Or <b>data quality issues</b> (e.g., duplication or sensor errors)
65
+ <b>Distribution Shape:</b><br>
66
+ - A <b>longer left whisker</b> implies a <b>left-skewed</b> distribution.<br>
67
+ - Most sessions have <b>lower step values</b>, with a few very high outliers.
68
+ </small>
69
+ """, unsafe_allow_html=True)
70
+
71
+
72
+ #st.write("1. Data Visualization - Scatter Plot (feature vs feature or vs target)")
73
+ # Assume merged_df is already defined or loaded
74
+ df_sample = merged_df # or use df_sample = merged_df.sample(n=50000) to downsample
75
+
76
+ st.subheader("Scatter Plot: anglez vs enmo")
77
+
78
+ col1, col2 = st.columns([1, 1])
79
+
80
+ with col1:
81
+ #st.subheader("Scatter Plot: anglez vs enmo")
82
+ # fig, ax = plt.subplots(figsize=(6, 4))
83
+ # sns.scatterplot(x=df['anglez'], y=df['enmo'], ax=ax)
84
+ # ax.set_title("Scatter Plot: anglez vs enmo")
85
+ # st.pyplot(fig)
86
+
87
+ # Create the plot
88
+ fig, ax = plt.subplots(figsize=(6, 4))
89
+ sns.scatterplot(x='anglez', y='enmo', data=df_sample, ax=ax)
90
+ ax.set_title("Scatter Plot: anglez vs enmo")
91
+
92
+ # Display in Streamlit
93
+ st.pyplot(fig)
94
+
95
+ with col2:
96
+ st.markdown("""
97
+ <small>
98
+ <b>1. Clustered Points:</b> Most `enmo` values are near 0, suggesting low movement.<br>
99
+ <b>2. Symmetry:</b> Spread is balanced on both sides of anglez (ยฑ), indicating no directional bias.<br>
100
+ <b>3. Weak Correlation:</b> No visible trend, suggesting independence between `anglez` and `enmo`.<br>
101
+ <b>4. Outliers:</b> A few high `enmo` points may indicate sudden or intense movement.<br>
102
+ <b>5. Interpretation:</b> Most data reflects light activity or rest, regardless of body orientation.
103
+ </small>
104
+ """, unsafe_allow_html=True)
105
+
106
+
107
+ # df_sample = merged_df.sample(n=10000) # adjust sample size for performance
108
+
109
+ # # Subheader
110
+ # st.subheader("Pair Plot of Features")
111
+
112
+ # # Create pairplot
113
+ # fig = sns.pairplot(df_sample[['anglez', 'enmo', 'step']])
114
+ # fig.fig.suptitle("Pair Plot of Features", y=1.02)
115
+
116
+ # # Display in Streamlit
117
+ # st.pyplot(fig)
118
+ # Define columns to plot
119
+
120
+ col1, col2 = st.columns([1, 1]) # Equal width
121
+
122
+ # Column 1: Pair Plot
123
+ with col1:
124
+ st.subheader("๐Ÿ“ˆ Pair Plot of Features")
125
+ fig = sns.pairplot(merged_df[['anglez', 'enmo', 'step']])
126
+ st.pyplot(fig)
127
+
128
+ # Column 2: Insights
129
+ with col2:
130
+ st.subheader("๐Ÿง  Insights from Pair Plot")
131
+ st.markdown("""
132
+ <div style='font-size: 14px'>
133
+
134
+ ### ๐Ÿ“Š Distribution Insights:
135
+ - **anglez**: Symmetric distribution peaking near -50 to 0.
136
+ - **enmo**: Right-skewed, most values below 0.1.
137
+ - **step**: Right-skewed, with a few large outliers.
138
+
139
+ ### ๐Ÿ” Pairwise Relationships:
140
+ - **anglez vs enmo**: No clear trend; cone-like shape.
141
+ - **anglez vs step**: No correlation; looks uniformly scattered.
142
+ - **enmo vs step**: Clustered at low values. High steps sometimes with low enmo.
143
+
144
+ ### ๐Ÿ’ก Summary:
145
+ - Features appear largely **uncorrelated**.
146
+ - Helps identify **data distributions** and potential **outliers**.
147
+ - Can assist in **feature selection/engineering**.
148
+
149
+ </div>
150
+ """, unsafe_allow_html=True)
151
+
152
+ # plot_columns = ['anglez', 'enmo', 'step']
153
+
154
+ # # Safety check: make sure required columns exist
155
+ # if all(col in merged_df.columns for col in plot_columns):
156
+
157
+ # # Check data size and sample accordingly
158
+ # max_rows = len(merged_df)
159
+ # sample_size = min(10000, max_rows) # Don't exceed available rows
160
+
161
+ # df_sample = merged_df.sample(n=sample_size)
162
+
163
+ # # Subheader
164
+ # st.subheader("Pair Plot of Features")
165
+
166
+ # # Create pairplot
167
+ # fig = sns.pairplot(df_sample[plot_columns])
168
+ # fig.fig.suptitle("Pair Plot of Features", y=1.02)
169
+
170
+ # # Display in Streamlit
171
+ # st.pyplot(fig)
172
+
173
+ # else:
174
+ # st.error("One or more required columns ('anglez', 'enmo', 'step') are missing in the dataset.")
175
+
176
+
177
+ # Plot
178
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
179
+
180
+ sns.histplot(df_sample['anglez'], kde=True, bins=50, ax=axes[0])
181
+ axes[0].set_title("Distribution of anglez")
182
+
183
+ sns.histplot(df_sample['enmo'], kde=True, bins=50, ax=axes[1])
184
+ axes[1].set_title("Distribution of enmo")
185
+
186
+ plt.tight_layout()
187
+ st.pyplot(fig)
188
+
189
+ # Show insights side by side
190
+ col1, col2 = st.columns(2)
191
+
192
+ with col1:
193
+ st.markdown("""
194
+ <div style='font-size: 14px'>
195
+ <h3> ๐Ÿ“ˆ Distribution of `anglez`: </h3>
196
+ - The distribution is **roughly symmetric**, centered around **-50 to 0**.
197
+ - It resembles a **left-heavy bell shape**, suggesting:
198
+ - Most sensor angles were **tilted negatively**.
199
+ - Indicates a **natural resting position** or specific posture.
200
+ </div>
201
+ """, unsafe_allow_html=True)
202
+
203
+ with col2:
204
+ st.markdown("""
205
+ <div style='font-size: 14px'>
206
+ <h3> ๐Ÿ“‰ Distribution of `enmo`: </h3>
207
+ - Highly **right-skewed** (sharp peak near zero).
208
+ - The majority of `enmo` values are **very small** (< 0.05), indicating:
209
+ - **Minimal movement or low activity** in most sessions.
210
+ - Few data points reflect **moderate to high movement**.
211
+ </div>
212
+ """, unsafe_allow_html=True)
213
+
214
+
215
+
216
+ # st.write("Multicollinearity Check - Correlation Matrix")
217
+ # features = ['anglez', 'enmo', 'step', 'night']
218
+ # df_subset = merged_df[features]
219
+
220
+ # # Streamlit title
221
+ # st.subheader("Multicollinearity Check - Correlation Matrix")
222
+
223
+ # # Calculate correlation matrix
224
+ # corr_matrix = df_subset.corr()
225
+
226
+ # # Plot heatmap
227
+ # fig, ax = plt.subplots(figsize=(6, 4))
228
+ # sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', ax=ax)
229
+ # ax.set_title("Correlation Matrix")
230
+
231
+ # # Display in Streamlit
232
+ # st.pyplot(fig)
233
+
234
+
235
+ st.subheader("Multicollinearity Check - Correlation Matrix")
236
+
237
+ # Select relevant features
238
+ features = ['anglez', 'enmo', 'step', 'night']
239
+ df_subset = merged_df[features]
240
+
241
+ # Calculate correlation matrix
242
+ corr_matrix = df_subset.corr()
243
+
244
+ # Create plot
245
+ fig, ax = plt.subplots(figsize=(6, 4))
246
+ sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, fmt=".3f", ax=ax)
247
+ ax.set_title("Correlation Matrix")
248
+
249
+ # Layout in two columns
250
+ col1, col2 = st.columns(2)
251
+
252
+ # Column 1: Heatmap
253
+ with col1:
254
+ st.pyplot(fig)
255
+
256
+ # Column 2: Textual Insights
257
+ with col2:
258
+ st.markdown("""
259
+ ### ๐Ÿ” Insights from Correlation Matrix
260
+
261
+ - **`anglez` & `enmo`**:
262
+ ๐Ÿ”ธ Weak negative correlation (**-0.11**) โ€” suggests minimal linear relationship.
263
+
264
+ - **`step` & `night`**:
265
+ โš ๏ธ Perfect correlation (**1.00**) โ€” indicates **redundancy**, likely representing the same event in different forms.
266
+
267
+ - **Overall**:
268
+ โœ… Low multicollinearity across most features โ€” safe for modeling.
269
+ ๐Ÿ“ Recommend removing either `step` or `night` to reduce feature duplication.
270
+ """)
271
+
272
+
273
+ # Encode
274
+ le = LabelEncoder()
275
+ merged_df['series_id'] = le.fit_transform(merged_df['series_id'])
276
+ merged_df['event'] = le.fit_transform(merged_df['event'])
277
+
278
+ # Drop columns with string or datetime values
279
+ drop_cols = ['sensor_timestamp', 'event_timestamp', 'night', 'step', 'sleep_duration_hrs', 'series_id']
280
+ df_cleaned = merged_df.drop(columns=[col for col in drop_cols if col in merged_df.columns])
281
+
282
+ # Ensure only numeric features in X
283
+ X = df_cleaned.drop('event', axis=1).select_dtypes(include=[np.number])
284
+ y = merged_df['event']
285
+
286
+ # Split and scale
287
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)
288
+
289
+ st.subheader("Feature Importance")
290
+ # Create model instance
291
+ xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss') # example for classification
292
+
293
+ # Fit the model
294
+ xgb_model.fit(X_train, y_train)
295
+
296
+ # Plot feature importance
297
+ fig, ax = plt.subplots(figsize=(6, 4))
298
+ xgb.plot_importance(xgb_model, ax=ax)
299
+ ax.set_title("XGBoost Feature Importance")
300
+
301
+ # Show in Streamlit
302
+ st.subheader("XGBoost Feature Importance")
303
+
304
+
305
+
306
+ col1, col2 = st.columns(2)
307
+
308
+ # Column 1: Plot
309
+ with col1:
310
+ st.pyplot(fig)
311
+ st.markdown("""
312
+ #### ๐Ÿšซ Low-Impact Features:
313
+ - Features like `step` and `night` (excluded in this plot) showed **minimal or redundant contribution**.
314
+ - ๐Ÿ” You may consider **removing** them to simplify the model.
315
+ """)
316
+ # Column 2: Insights
317
+ with col2:
318
+ st.markdown("""
319
+ <small>
320
+ <h3> ๐Ÿ” XGBoost Feature Importance: Key Insights </h3>
321
+
322
+ #### ๐Ÿ“Œ Top Features:
323
+ - ๐Ÿ”น **`anglez`** โ€” Highest importance score (**1557**)
324
+ - ๐Ÿ”น **`enmo`** โ€” Close second with score (**1546**)
325
+
326
+ #### โœ… Summary:
327
+ - Both `anglez` and `enmo` contribute **significantly** to the model.
328
+ - Their high scores reflect **strong influence** in predicting the target variable.
329
+
330
+ #### ๐Ÿ’ก Interpretation:
331
+ - These features likely capture **activity level** or **sleep posture** patterns.
332
+ - Keeping both is **recommended** for accurate classification.
333
+ </small>
334
+
335
+ """, unsafe_allow_html=True)
336
+
337
+
338
+
pages/predict page.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
8
+ from xgboost import XGBClassifier
9
+
10
+
11
+ st.title("๐Ÿง  Sleep Event Prediction")
12
+
13
+ # --- Load and preprocess data ---
14
+ merged_df = pd.read_csv("merged_df.csv")
15
+ st.subheader("Raw Data Sample")
16
+ st.dataframe(merged_df.head())
17
+
18
+ # Drop nulls in important columns
19
+ merged_df = merged_df.dropna(subset=['night', 'event', 'event_timestamp'])
20
+
21
+ # Convert timestamps
22
+ merged_df['event_timestamp'] = pd.to_datetime(merged_df['event_timestamp'], format='%Y-%m-%dT%H:%M:%S%z', utc=True)
23
+ merged_df['sensor_timestamp'] = pd.to_datetime(merged_df['sensor_timestamp'], format='%Y-%m-%dT%H:%M:%S%z', utc=True)
24
+
25
+ # Calculate duration
26
+ merged_df['sleep_duration_hrs'] = (merged_df['sensor_timestamp'] - merged_df['event_timestamp']).dt.total_seconds() / 3600
27
+
28
+ # Encode categorical columns
29
+ le_event = LabelEncoder()
30
+ merged_df['event_encoded'] = le_event.fit_transform(merged_df['event'])
31
+
32
+ le_series = LabelEncoder()
33
+ merged_df['series_id_encoded'] = le_series.fit_transform(merged_df['series_id'])
34
+
35
+ # Select features
36
+ X = merged_df[['anglez', 'enmo']]
37
+ y = merged_df['event_encoded']
38
+
39
+ # Train-test split
40
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
41
+
42
+ # Scale features
43
+ scaler = StandardScaler()
44
+ X_train_scaled = scaler.fit_transform(X_train)
45
+ X_test_scaled = scaler.transform(X_test)
46
+
47
+ # Train model
48
+ model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
49
+ model.fit(X_train_scaled, y_train)
50
+
51
+ # Evaluate model
52
+ y_pred = model.predict(X_test_scaled)
53
+ y_proba = model.predict_proba(X_test_scaled)
54
+
55
+ accuracy = accuracy_score(y_test, y_pred)
56
+ f1 = f1_score(y_test, y_pred, average='macro')
57
+
58
+ # Handle binary or multiclass AUC
59
+ if y_proba.shape[1] == 2:
60
+ roc = roc_auc_score(y_test, y_proba[:, 1])
61
+ else:
62
+ roc = roc_auc_score(y_test, y_proba, multi_class='ovo', average='macro')
63
+
64
+
65
+
66
+
67
+ # --- Predict User Input ---
68
+ st.subheader("๐Ÿ”ฎ Predict Sleep Event")
69
+ anglez = st.number_input("Enter anglez:", value=27.88, format="%.4f")
70
+ enmo = st.number_input("Enter enmo:", value=0.00, format="%.4f")
71
+
72
+ if st.button("Predict Sleep Event"):
73
+ input_data = np.array([[anglez, enmo]])
74
+ input_scaled = scaler.transform(input_data)
75
+ prediction = model.predict(input_scaled)[0]
76
+ predicted_label = le_event.inverse_transform([prediction])[0]
77
+ st.success(f"Predicted Sleep Event: {predicted_label}")
78
+
79
+
80
+ # # app.py (your Streamlit file)
81
+ # import streamlit as st
82
+ # import numpy as np
83
+ # # import pickle
84
+ # from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
85
+ # import pandas as pd
86
+ # from sklearn.preprocessing import LabelEncoder,StandardScaler
87
+ # from sklearn.model_selection import train_test_split
88
+ # from xgboost import XGBClassifier
89
+
90
+ # st.title("๐Ÿง  Sleep Event Prediction")
91
+
92
+ # # --- Load Pickles ---
93
+ # # @st.cache_resource
94
+ # # def load_all():
95
+ # # with open("model.pkl", "rb") as f: model = pickle.load(f)
96
+ # # with open("scaler.pkl", "rb") as f: scaler = pickle.load(f)
97
+ # # with open("label_encoder.pkl", "rb") as f: le = pickle.load(f)
98
+ # # with open("X_test.pkl", "rb") as f: X_test = pickle.load(f)
99
+ # # with open("y_test.pkl", "rb") as f: y_test = pickle.load(f)
100
+ # # return model, scaler, le, X_test, y_test
101
+
102
+ # merged_df=pd.read_csv("merged_df.csv")
103
+ # st.dataframe(merged_df.head())
104
+ # # Step 1: Drop rows with nulls in key columns
105
+ # merged_df = merged_df.dropna(subset=['night', 'event', 'event_timestamp'])
106
+
107
+ # # Step 2: Reset index (also avoid inplace)
108
+ # merged_df = merged_df.reset_index(drop=True)
109
+ # merged_df['event_timestamp'] = pd.to_datetime(merged_df['event_timestamp'], format='%Y-%m-%dT%H:%M:%S%z',utc=True)
110
+ # merged_df['sensor_timestamp'] = pd.to_datetime(merged_df['sensor_timestamp'], format='%Y-%m-%dT%H:%M:%S%z',utc=True)
111
+ # merged_df['sleep_duration_hrs'] = (merged_df['sensor_timestamp'] - merged_df['event_timestamp']).dt.total_seconds() / 3600
112
+
113
+ # le = LabelEncoder()
114
+ # merged_df['series_id'] = le.fit_transform(merged_df['series_id'])
115
+ # merged_df['event'] = le.fit_transform(merged_df['event']) # Target label
116
+
117
+ # # columns_to_drop = ['sensor_timestamp', 'series_id', 'event_timestamp','night','sleep_duration_hrs','step']
118
+
119
+ # # Drop specified columns and define features (X) and target (y)
120
+ # # df_cleaned = merged_df.drop([col for col in columns_to_drop if col in merged_df.columns], axis=1)
121
+
122
+ # # X = df_cleaned.drop('event', axis=1)
123
+ # # y = df_cleaned['event']
124
+
125
+ # X = merged_df[['anglez', 'enmo']]
126
+ # y = merged_df['event']
127
+
128
+ # # Train-test split
129
+ # X_train, X_test, y_train, y_test = train_test_split(
130
+ # X, y, test_size=0.2
131
+ # )
132
+
133
+ # # 6. Scale features (optional for XGBoost but good practice)
134
+ # scaler = StandardScaler()
135
+ # X_train_scaled = scaler.fit_transform(X_train)
136
+ # X_test_scaled = scaler.transform(X_test)
137
+
138
+ # # 7. Train XGBoost model
139
+ # # model = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, reg_alpha=1, reg_lambda=1, eval_metric='logloss')
140
+ # model = XGBClassifier()
141
+ # model.fit(X_train_scaled, y_train)
142
+
143
+ # # 8. Predict and Evaluate
144
+ # y_pred = model.predict(X_test_scaled)
145
+ # y_proba = model.predict_proba(X_test_scaled)
146
+
147
+ # accuracy = accuracy_score(y_test, y_pred)
148
+ # f1 = f1_score(y_test, y_pred, average='macro')
149
+
150
+ # if y_proba.shape[1] == 2:
151
+ # roc = roc_auc_score(y_test, y_proba[:, 1])
152
+ # else:
153
+ # roc = roc_auc_score(y_test, y_proba, multi_class='ovo', average='macro')
154
+
155
+
156
+ # # --- Display Metrics ---
157
+ # # st.subheader("Model Performance")
158
+ # # st.metric("Accuracy", f"{accuracy:.4f}")
159
+ # # st.metric("F1 Score", f"{f1:.4f}")
160
+ # # st.metric("ROC AUC Score", f"{roc:.4f}")
161
+
162
+ # # Create a DataFrame for metrics
163
+ # # import pandas as pd
164
+
165
+ # st.subheader("Model Performance")
166
+
167
+ # # Create a DataFrame for metrics
168
+ # metrics_df = pd.DataFrame({
169
+ # "Metric": ["Accuracy", "F1 Score", "ROC AUC Score"],
170
+ # "Value": [f"{accuracy:.4f}", f"{f1:.4f}", f"{roc:.4f}"]
171
+ # })
172
+
173
+ # # Display as table
174
+ # st.table(metrics_df)
175
+
176
+ # counts = merged_df["event"].value_counts()
177
+ # st.markdown("**Event Value Counts:**")
178
+ # st.markdown(counts.to_string())
179
+
180
+ # # --- Predict User Input ---
181
+ # st.subheader("Predict Sleep Event")
182
+ # anglez = st.number_input("Enter anglez:", value=27.8800,format="%.4f")
183
+ # enmo = st.number_input("Enter enmo:", value=0.0000,format="%.4f")
184
+
185
+ # if st.button("Predict Sleep Event"):
186
+ # input_data = np.array([[anglez, enmo]])
187
+ # input_scaled = scaler.transform(input_data)
188
+ # prediction = model.predict(input_scaled)[0]
189
+ # label = le.inverse_transform([prediction])[0]
190
+ # st.success(f"Predicted Event: {label}")
191
+ # Display class balance
192
+
193
+ # Display metrics
194
+ st.subheader("๐Ÿ“Š Model Performance")
195
+ metrics_df = pd.DataFrame({
196
+ "Metric": ["Accuracy", "F1 Score", "ROC AUC Score"],
197
+ "Value": [f"{accuracy:.4f}", f"{f1:.4f}", f"{roc:.4f}"]
198
+ })
199
+ st.table(metrics_df)
200
+
201
+ st.subheader("๐Ÿ“ˆ Event Value Counts")
202
+ value_counts_df = merged_df["event"].value_counts().reset_index()
203
+ value_counts_df.columns = ["Event", "Count"]
204
+ st.dataframe(value_counts_df)