# app.py import streamlit as st import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, f1_score, roc_auc_score from xgboost import XGBClassifier st.title("🧠 Sleep Event Prediction") # --- Load and preprocess data --- merged_df = pd.read_csv("merged_df.csv") st.subheader("Raw Data Sample") st.dataframe(merged_df.head()) # Drop nulls in important columns merged_df = merged_df.dropna(subset=['night', 'event', 'event_timestamp']) # Convert timestamps merged_df['event_timestamp'] = pd.to_datetime(merged_df['event_timestamp'], format='%Y-%m-%dT%H:%M:%S%z', utc=True) merged_df['sensor_timestamp'] = pd.to_datetime(merged_df['sensor_timestamp'], format='%Y-%m-%dT%H:%M:%S%z', utc=True) # Calculate duration merged_df['sleep_duration_hrs'] = (merged_df['sensor_timestamp'] - merged_df['event_timestamp']).dt.total_seconds() / 3600 # Encode categorical columns le_event = LabelEncoder() merged_df['event_encoded'] = le_event.fit_transform(merged_df['event']) le_series = LabelEncoder() merged_df['series_id_encoded'] = le_series.fit_transform(merged_df['series_id']) # Select features X = merged_df[['anglez', 'enmo']] y = merged_df['event_encoded'] # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Scale features scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Train model model = XGBClassifier(use_label_encoder=False, eval_metric='logloss') model.fit(X_train_scaled, y_train) # Evaluate model y_pred = model.predict(X_test_scaled) y_proba = model.predict_proba(X_test_scaled) accuracy = accuracy_score(y_test, y_pred) f1 = f1_score(y_test, y_pred, average='macro') # Handle binary or multiclass AUC if y_proba.shape[1] == 2: roc = roc_auc_score(y_test, y_proba[:, 1]) else: roc = roc_auc_score(y_test, y_proba, multi_class='ovo', average='macro') # --- Predict User Input --- st.subheader("🔮 Predict Sleep Event") anglez = st.number_input("Enter anglez:", value=27.88, format="%.4f") enmo = st.number_input("Enter enmo:", value=0.00, format="%.4f") if st.button("Predict Sleep Event"): input_data = np.array([[anglez, enmo]]) input_scaled = scaler.transform(input_data) prediction = model.predict(input_scaled)[0] predicted_label = le_event.inverse_transform([prediction])[0] st.success(f"Predicted Sleep Event: {predicted_label}") # # app.py (your Streamlit file) # import streamlit as st # import numpy as np # # import pickle # from sklearn.metrics import accuracy_score, f1_score, roc_auc_score # import pandas as pd # from sklearn.preprocessing import LabelEncoder,StandardScaler # from sklearn.model_selection import train_test_split # from xgboost import XGBClassifier # st.title("🧠 Sleep Event Prediction") # # --- Load Pickles --- # # @st.cache_resource # # def load_all(): # # with open("model.pkl", "rb") as f: model = pickle.load(f) # # with open("scaler.pkl", "rb") as f: scaler = pickle.load(f) # # with open("label_encoder.pkl", "rb") as f: le = pickle.load(f) # # with open("X_test.pkl", "rb") as f: X_test = pickle.load(f) # # with open("y_test.pkl", "rb") as f: y_test = pickle.load(f) # # return model, scaler, le, X_test, y_test # merged_df=pd.read_csv("merged_df.csv") # st.dataframe(merged_df.head()) # # Step 1: Drop rows with nulls in key columns # merged_df = merged_df.dropna(subset=['night', 'event', 'event_timestamp']) # # Step 2: Reset index (also avoid inplace) # merged_df = merged_df.reset_index(drop=True) # merged_df['event_timestamp'] = pd.to_datetime(merged_df['event_timestamp'], format='%Y-%m-%dT%H:%M:%S%z',utc=True) # merged_df['sensor_timestamp'] = pd.to_datetime(merged_df['sensor_timestamp'], format='%Y-%m-%dT%H:%M:%S%z',utc=True) # merged_df['sleep_duration_hrs'] = (merged_df['sensor_timestamp'] - merged_df['event_timestamp']).dt.total_seconds() / 3600 # le = LabelEncoder() # merged_df['series_id'] = le.fit_transform(merged_df['series_id']) # merged_df['event'] = le.fit_transform(merged_df['event']) # Target label # # columns_to_drop = ['sensor_timestamp', 'series_id', 'event_timestamp','night','sleep_duration_hrs','step'] # # Drop specified columns and define features (X) and target (y) # # df_cleaned = merged_df.drop([col for col in columns_to_drop if col in merged_df.columns], axis=1) # # X = df_cleaned.drop('event', axis=1) # # y = df_cleaned['event'] # X = merged_df[['anglez', 'enmo']] # y = merged_df['event'] # # Train-test split # X_train, X_test, y_train, y_test = train_test_split( # X, y, test_size=0.2 # ) # # 6. Scale features (optional for XGBoost but good practice) # scaler = StandardScaler() # X_train_scaled = scaler.fit_transform(X_train) # X_test_scaled = scaler.transform(X_test) # # 7. Train XGBoost model # # model = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, reg_alpha=1, reg_lambda=1, eval_metric='logloss') # model = XGBClassifier() # model.fit(X_train_scaled, y_train) # # 8. Predict and Evaluate # y_pred = model.predict(X_test_scaled) # y_proba = model.predict_proba(X_test_scaled) # accuracy = accuracy_score(y_test, y_pred) # f1 = f1_score(y_test, y_pred, average='macro') # if y_proba.shape[1] == 2: # roc = roc_auc_score(y_test, y_proba[:, 1]) # else: # roc = roc_auc_score(y_test, y_proba, multi_class='ovo', average='macro') # # --- Display Metrics --- # # st.subheader("Model Performance") # # st.metric("Accuracy", f"{accuracy:.4f}") # # st.metric("F1 Score", f"{f1:.4f}") # # st.metric("ROC AUC Score", f"{roc:.4f}") # # Create a DataFrame for metrics # # import pandas as pd # st.subheader("Model Performance") # # Create a DataFrame for metrics # metrics_df = pd.DataFrame({ # "Metric": ["Accuracy", "F1 Score", "ROC AUC Score"], # "Value": [f"{accuracy:.4f}", f"{f1:.4f}", f"{roc:.4f}"] # }) # # Display as table # st.table(metrics_df) # counts = merged_df["event"].value_counts() # st.markdown("**Event Value Counts:**") # st.markdown(counts.to_string()) # # --- Predict User Input --- # st.subheader("Predict Sleep Event") # anglez = st.number_input("Enter anglez:", value=27.8800,format="%.4f") # enmo = st.number_input("Enter enmo:", value=0.0000,format="%.4f") # if st.button("Predict Sleep Event"): # input_data = np.array([[anglez, enmo]]) # input_scaled = scaler.transform(input_data) # prediction = model.predict(input_scaled)[0] # label = le.inverse_transform([prediction])[0] # st.success(f"Predicted Event: {label}") # Display class balance # Display metrics st.subheader("📊 Model Performance") metrics_df = pd.DataFrame({ "Metric": ["Accuracy", "F1 Score", "ROC AUC Score"], "Value": [f"{accuracy:.4f}", f"{f1:.4f}", f"{roc:.4f}"] }) st.table(metrics_df) st.subheader("📈 Event Value Counts") value_counts_df = merged_df["event"].value_counts().reset_index() value_counts_df.columns = ["Event", "Count"] st.dataframe(value_counts_df)