# app.py | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sklearn.preprocessing import LabelEncoder, StandardScaler | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score | |
from xgboost import XGBClassifier | |
st.title("๐ง Sleep Event Prediction") | |
# --- Load and preprocess data --- | |
merged_df = pd.read_csv("merged_df.csv") | |
st.subheader("Raw Data Sample") | |
st.dataframe(merged_df.head()) | |
# Drop nulls in important columns | |
merged_df = merged_df.dropna(subset=['night', 'event', 'event_timestamp']) | |
# Convert timestamps | |
merged_df['event_timestamp'] = pd.to_datetime(merged_df['event_timestamp'], format='%Y-%m-%dT%H:%M:%S%z', utc=True) | |
merged_df['sensor_timestamp'] = pd.to_datetime(merged_df['sensor_timestamp'], format='%Y-%m-%dT%H:%M:%S%z', utc=True) | |
# Calculate duration | |
merged_df['sleep_duration_hrs'] = (merged_df['sensor_timestamp'] - merged_df['event_timestamp']).dt.total_seconds() / 3600 | |
# Encode categorical columns | |
le_event = LabelEncoder() | |
merged_df['event_encoded'] = le_event.fit_transform(merged_df['event']) | |
le_series = LabelEncoder() | |
merged_df['series_id_encoded'] = le_series.fit_transform(merged_df['series_id']) | |
# Select features | |
X = merged_df[['anglez', 'enmo']] | |
y = merged_df['event_encoded'] | |
# Train-test split | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) | |
# Scale features | |
scaler = StandardScaler() | |
X_train_scaled = scaler.fit_transform(X_train) | |
X_test_scaled = scaler.transform(X_test) | |
# Train model | |
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss') | |
model.fit(X_train_scaled, y_train) | |
# Evaluate model | |
y_pred = model.predict(X_test_scaled) | |
y_proba = model.predict_proba(X_test_scaled) | |
accuracy = accuracy_score(y_test, y_pred) | |
f1 = f1_score(y_test, y_pred, average='macro') | |
# Handle binary or multiclass AUC | |
if y_proba.shape[1] == 2: | |
roc = roc_auc_score(y_test, y_proba[:, 1]) | |
else: | |
roc = roc_auc_score(y_test, y_proba, multi_class='ovo', average='macro') | |
# --- Predict User Input --- | |
st.subheader("๐ฎ Predict Sleep Event") | |
anglez = st.number_input("Enter anglez:", value=27.88, format="%.4f") | |
enmo = st.number_input("Enter enmo:", value=0.00, format="%.4f") | |
if st.button("Predict Sleep Event"): | |
input_data = np.array([[anglez, enmo]]) | |
input_scaled = scaler.transform(input_data) | |
prediction = model.predict(input_scaled)[0] | |
predicted_label = le_event.inverse_transform([prediction])[0] | |
st.success(f"Predicted Sleep Event: {predicted_label}") | |
# # app.py (your Streamlit file) | |
# import streamlit as st | |
# import numpy as np | |
# # import pickle | |
# from sklearn.metrics import accuracy_score, f1_score, roc_auc_score | |
# import pandas as pd | |
# from sklearn.preprocessing import LabelEncoder,StandardScaler | |
# from sklearn.model_selection import train_test_split | |
# from xgboost import XGBClassifier | |
# st.title("๐ง Sleep Event Prediction") | |
# # --- Load Pickles --- | |
# # @st.cache_resource | |
# # def load_all(): | |
# # with open("model.pkl", "rb") as f: model = pickle.load(f) | |
# # with open("scaler.pkl", "rb") as f: scaler = pickle.load(f) | |
# # with open("label_encoder.pkl", "rb") as f: le = pickle.load(f) | |
# # with open("X_test.pkl", "rb") as f: X_test = pickle.load(f) | |
# # with open("y_test.pkl", "rb") as f: y_test = pickle.load(f) | |
# # return model, scaler, le, X_test, y_test | |
# merged_df=pd.read_csv("merged_df.csv") | |
# st.dataframe(merged_df.head()) | |
# # Step 1: Drop rows with nulls in key columns | |
# merged_df = merged_df.dropna(subset=['night', 'event', 'event_timestamp']) | |
# # Step 2: Reset index (also avoid inplace) | |
# merged_df = merged_df.reset_index(drop=True) | |
# merged_df['event_timestamp'] = pd.to_datetime(merged_df['event_timestamp'], format='%Y-%m-%dT%H:%M:%S%z',utc=True) | |
# merged_df['sensor_timestamp'] = pd.to_datetime(merged_df['sensor_timestamp'], format='%Y-%m-%dT%H:%M:%S%z',utc=True) | |
# merged_df['sleep_duration_hrs'] = (merged_df['sensor_timestamp'] - merged_df['event_timestamp']).dt.total_seconds() / 3600 | |
# le = LabelEncoder() | |
# merged_df['series_id'] = le.fit_transform(merged_df['series_id']) | |
# merged_df['event'] = le.fit_transform(merged_df['event']) # Target label | |
# # columns_to_drop = ['sensor_timestamp', 'series_id', 'event_timestamp','night','sleep_duration_hrs','step'] | |
# # Drop specified columns and define features (X) and target (y) | |
# # df_cleaned = merged_df.drop([col for col in columns_to_drop if col in merged_df.columns], axis=1) | |
# # X = df_cleaned.drop('event', axis=1) | |
# # y = df_cleaned['event'] | |
# X = merged_df[['anglez', 'enmo']] | |
# y = merged_df['event'] | |
# # Train-test split | |
# X_train, X_test, y_train, y_test = train_test_split( | |
# X, y, test_size=0.2 | |
# ) | |
# # 6. Scale features (optional for XGBoost but good practice) | |
# scaler = StandardScaler() | |
# X_train_scaled = scaler.fit_transform(X_train) | |
# X_test_scaled = scaler.transform(X_test) | |
# # 7. Train XGBoost model | |
# # model = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, reg_alpha=1, reg_lambda=1, eval_metric='logloss') | |
# model = XGBClassifier() | |
# model.fit(X_train_scaled, y_train) | |
# # 8. Predict and Evaluate | |
# y_pred = model.predict(X_test_scaled) | |
# y_proba = model.predict_proba(X_test_scaled) | |
# accuracy = accuracy_score(y_test, y_pred) | |
# f1 = f1_score(y_test, y_pred, average='macro') | |
# if y_proba.shape[1] == 2: | |
# roc = roc_auc_score(y_test, y_proba[:, 1]) | |
# else: | |
# roc = roc_auc_score(y_test, y_proba, multi_class='ovo', average='macro') | |
# # --- Display Metrics --- | |
# # st.subheader("Model Performance") | |
# # st.metric("Accuracy", f"{accuracy:.4f}") | |
# # st.metric("F1 Score", f"{f1:.4f}") | |
# # st.metric("ROC AUC Score", f"{roc:.4f}") | |
# # Create a DataFrame for metrics | |
# # import pandas as pd | |
# st.subheader("Model Performance") | |
# # Create a DataFrame for metrics | |
# metrics_df = pd.DataFrame({ | |
# "Metric": ["Accuracy", "F1 Score", "ROC AUC Score"], | |
# "Value": [f"{accuracy:.4f}", f"{f1:.4f}", f"{roc:.4f}"] | |
# }) | |
# # Display as table | |
# st.table(metrics_df) | |
# counts = merged_df["event"].value_counts() | |
# st.markdown("**Event Value Counts:**") | |
# st.markdown(counts.to_string()) | |
# # --- Predict User Input --- | |
# st.subheader("Predict Sleep Event") | |
# anglez = st.number_input("Enter anglez:", value=27.8800,format="%.4f") | |
# enmo = st.number_input("Enter enmo:", value=0.0000,format="%.4f") | |
# if st.button("Predict Sleep Event"): | |
# input_data = np.array([[anglez, enmo]]) | |
# input_scaled = scaler.transform(input_data) | |
# prediction = model.predict(input_scaled)[0] | |
# label = le.inverse_transform([prediction])[0] | |
# st.success(f"Predicted Event: {label}") | |
# Display class balance | |
# Display metrics | |
st.subheader("๐ Model Performance") | |
metrics_df = pd.DataFrame({ | |
"Metric": ["Accuracy", "F1 Score", "ROC AUC Score"], | |
"Value": [f"{accuracy:.4f}", f"{f1:.4f}", f"{roc:.4f}"] | |
}) | |
st.table(metrics_df) | |
st.subheader("๐ Event Value Counts") | |
value_counts_df = merged_df["event"].value_counts().reset_index() | |
value_counts_df.columns = ["Event", "Count"] | |
st.dataframe(value_counts_df) |