Spaces:

iaravagni
/

BloodGlucosePrediction

Running

iaravagni

update

40daefb about 2 months ago

18 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import os
	import joblib
	from sklearn.metrics import root_mean_squared_error

	from scripts.make_dataset import create_features
	from scripts.naive_approach import get_column_specs, prepare_data, zeroshot_eval, simple_diagonal_averaging
	from scripts.ml_approach import format_dataset

	CONTEXT_LENGTH = 52
	PREDICTION_LENGTH = 6

	# Custom theme settings
	st.set_page_config(
	page_title="Glucose Level Prediction App",
	page_icon="📊",
	layout="wide"
	)

	# Apply custom styling with CSS
	st.markdown("""
	<style>
	/* Primary accent color */
	.stButton button, .stSelectbox, .stMultiselect, .stSlider, .stNumberInput {
	border-color: #58A618 !important;
	}
	.stProgress .st-bo {
	background-color: #58A618 !important;
	}
	.st-bq {
	color: #58A618 !important;
	}
	/* Header styling */
	h1, h2, h3 {
	color: #58A618 !important;
	}
	.stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
	color: #58A618 !important;
	}
	/* Success messages */
	.element-container .stAlert.st-ae.st-af {
	border-color: #58A618 !important;
	color: #58A618 !important;
	}
	/* Link color */
	a {
	color: #58A618 !important;
	}
	/* Button color */
	.stButton>button {
	background-color: #58A618 !important;
	color: white !important;
	}
	/* Make background white */
	.stApp {
	background-color: white !important;
	}
	/* Tab styling */
	.stTabs [data-baseweb="tab-list"] {
	gap: 10px;
	}
	.stTabs [data-baseweb="tab"] {
	background-color: #f0f0f0;
	border-radius: 4px 4px 0 0;
	padding: 10px 16px;
	border: 1px solid #ccc;
	}
	.stTabs [data-baseweb="tab"][aria-selected="true"] {
	background-color: white;
	border-bottom: 3px solid #58A618;
	}
	</style>
	""", unsafe_allow_html=True)

	# Initialize session state variables if they don't exist
	if 'combined_data' not in st.session_state:
	st.session_state.combined_data = None
	if 'files_uploaded' not in st.session_state:
	st.session_state.files_uploaded = False
	if 'data_processed' not in st.session_state:
	st.session_state.data_processed = False

	# Title and description
	st.title("Glucose Level Prediction App")
	st.markdown("""
	This app allows you to upload glucose measurements, food logs, and accelerometer data
	to analyze patterns and predict glucose levels.
	""")

	# Choose data source
	st.subheader("Choose Data Source")
	data_option = st.selectbox(
	"Select how you'd like to provide input data:",
	("Upload files", "Sample A", "Sample B")
	)

	glucose_data = None
	food_data = None
	accel_data = None
	combined_data = None
	show_tabs = False

	if data_option == "Upload files":
	st.subheader("Upload Your Data Files")

	glucose_file = st.file_uploader("Upload Glucose Levels CSV", type=["csv"], key="glucose")
	food_file = st.file_uploader("Upload Food Logs CSV", type=["csv"], key="food")
	accel_file = st.file_uploader("Upload Accelerometer Data CSV", type=["csv"], key="accel")

	st.subheader("Patient Demographics")

	# Gender selection
	gender = st.selectbox("Select Patient Gender", options=["Female", "Male", "Other"], index=0)

	# HbA1c input
	hba1c = st.number_input("Enter HbA1c (%)", min_value=3.0, max_value=15.0, step=0.1)

	all_files_uploaded = (glucose_file is not None) and (food_file is not None) and (accel_file is not None)

	# Attempt to load files if they exist
	if glucose_file is not None:
	try:
	glucose_data = pd.read_csv(glucose_file)
	st.success("Glucose data loaded successfully!")
	except Exception as e:
	st.error(f"Error loading glucose data: {e}")
	glucose_data = None

	if food_file is not None:
	try:
	food_data = pd.read_csv(food_file)
	st.success("Food logs loaded successfully!")
	except Exception as e:
	st.error(f"Error loading food logs: {e}")
	food_data = None

	if accel_file is not None:
	try:
	accel_data = pd.read_csv(accel_file)
	st.success("Accelerometer data loaded successfully!")
	except Exception as e:
	st.error(f"Error loading accelerometer data: {e}")
	accel_data = None

	# Update the upload status in session state
	st.session_state.files_uploaded = all_files_uploaded

	# Show message if not all files are uploaded
	if not all_files_uploaded:
	st.warning("Please upload all three data files to enable data processing.")

	col1, col2, col3 = st.columns([1,1,1])

	with col2:
	# Add a button to process the data - disabled until all files are uploaded
	if st.button('Process Data', key='process_data_button', disabled=not all_files_uploaded):
	if all_files_uploaded:
	try:
	# Call create_features with appropriate parameters
	combined_data = create_features(
	bg_df=glucose_data,
	food_df=food_data,
	acc_df=accel_data,
	gender=gender,
	hba1c=hba1c,
	add_patient_id=True
	)
	st.session_state.combined_data = combined_data
	st.session_state.data_processed = True
	st.success("Data processed successfully!")
	show_tabs = True
	except Exception as e:
	st.error(f"Error processing data: {e}")
	st.session_state.data_processed = False
	show_tabs = False

	st.subheader("Expected File Formats:")

	col1, col2, col3 = st.columns(3)

	with col1:
	st.markdown("""
	Glucose Levels CSV:
	- Timestamp column
	- Glucose measurement values
	""")

	with col2:
	st.markdown("""
	Food Logs CSV:
	- Timestamp column
	- Carbohydrates
	- Sugar
	- Calories
	""")

	with col3:
	st.markdown("""
	Accelerometer Data CSV:
	- Timestamp column
	- Activity measurements
	""")

	# Check if data was previously processed
	if st.session_state.data_processed and st.session_state.combined_data is not None:
	combined_data = st.session_state.combined_data
	show_tabs = True

	elif data_option == "Sample A":
	combined_data_path = 'data/processed/samples/sample_A.csv'
	combined_data = pd.read_csv(combined_data_path)
	st.session_state.combined_data = combined_data
	st.session_state.data_processed = True
	st.success("Sample A loaded successfully!")
	show_tabs = True

	elif data_option == "Sample B":
	combined_data_path = 'data/processed/samples/sample_B.csv'
	combined_data = pd.read_csv(combined_data_path)
	st.session_state.combined_data = combined_data
	st.session_state.data_processed = True
	st.success("Sample B loaded successfully!")
	show_tabs = True

	# Add some spacing
	st.write("")
	st.write("")

	# Only show tabs if sample data is loaded or user data has been successfully processed
	if show_tabs:
	# Create tabs for data exploration
	tab1, tab2, tab3 = st.tabs(["Naive Model", "Machine Learning Model", "Deep Learning Model"])

	with tab1:
	st.subheader("Naive Model")

	if st.button('Make prediction', key='naive_button'):
	if combined_data is not None:

	# Add your naive model prediction code here
	try:
	# Call naive model prediction functions
	column_specs = get_column_specs()
	prepared_data = prepare_data(combined_data, column_specs["timestamp_column"])
	train_file = 'data/processed/train_dataset.csv'
	train_data = pd.read_csv(train_file)
	train_data = prepare_data(train_data, column_specs["timestamp_column"])
	predictions = zeroshot_eval(
	train_df=train_data,
	test_df=prepared_data,
	batch_size=8
	)

	# Get all step columns
	step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")]

	# Apply simple diagonal averaging by patient
	final_results = simple_diagonal_averaging(
	predictions["predictions_df"],
	prepared_data,
	CONTEXT_LENGTH,
	step_columns
	)

	# Visualize predictions vs actual values
	fig, ax = plt.subplots(figsize=(10, 6))

	# Filter out zero predictions
	non_zero_mask = final_results['averaged_prediction'] != 0
	filtered_results = final_results[non_zero_mask]

	# Plot predictions (only non-zero values) in green
	ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'],
	label='Predicted', alpha=0.7, color='#58A618')

	# Plot actual values (all data) in blue
	ax.plot(final_results['Timestamp'], final_results['Glucose'],
	label='Ground truth', alpha=0.7, color='#1f77b4')

	ax.set_title('Glucose Predictions vs Actual Values')
	ax.set_xlabel('Time')
	ax.set_ylabel('Glucose Level')
	ax.legend()

	st.pyplot(fig)

	y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True)
	y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True)

	# Filter out zero predictions
	non_zero_mask = y_pred != 0
	y_true_filtered = y_true[non_zero_mask]
	y_pred_filtered = y_pred[non_zero_mask]

	if len(y_pred_filtered) > 0:
	rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered))
	st.subheader("Performance Metrics")
	st.metric("AVERAGE RMSE", f"{rmse:.4f}")
	else:
	st.subheader("Performance Metrics")
	st.metric("AVERAGE RMSE", "N/A")

	except Exception as e:
	st.error(f"Error in naive model prediction: {e}")
	else:
	st.error("Data not available. Please try again.")

	with tab2:
	st.subheader("Machine Learning Model")

	if st.button('Make prediction', key='ml_button'):
	if combined_data is not None:
	X_test, y_test = format_dataset(combined_data, CONTEXT_LENGTH, PREDICTION_LENGTH)

	model_output_path = "models/xgb_model.pkl"
	xgb_model = joblib.load(model_output_path)

	y_test_pred = xgb_model.predict(X_test)

	final_results = simple_diagonal_averaging(
	pd.DataFrame(y_test_pred),
	combined_data,
	CONTEXT_LENGTH,
	pd.DataFrame(y_test_pred).columns
	)

	# Visualize predictions vs actual values
	fig, ax = plt.subplots(figsize=(10, 6))

	# Plot all actual values in blue
	ax.plot(final_results['Timestamp'], final_results['Glucose'],
	label='Ground truth', alpha=0.7, color='#1f77b4')

	# Replace zeros with NaN (which matplotlib will skip when plotting)
	plot_predictions = final_results['averaged_prediction'].copy()
	plot_predictions = plot_predictions.replace(0, float('nan'))

	# Plot predictions with NaN instead of zeros in green
	ax.plot(final_results['Timestamp'], plot_predictions,
	label='Predicted', alpha=0.7, color='#58A618')

	ax.set_title('Glucose Predictions vs Actual Values')
	ax.set_xlabel('Time')
	ax.set_ylabel('Glucose Level')
	ax.legend()

	st.pyplot(fig)

	# Calculate and display metrics for single patient
	y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True)
	y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True)

	# Filter out zero predictions
	non_zero_mask = y_pred != 0
	y_true_filtered = y_true[non_zero_mask]
	y_pred_filtered = y_pred[non_zero_mask]

	if len(y_pred_filtered) > 0:
	rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered))
	st.subheader("Performance Metrics")
	st.metric("AVERAGE RMSE", f"{rmse:.4f}")
	else:
	st.subheader("Performance Metrics")
	st.metric("AVERAGE RMSE", "N/A")

	else:
	st.error("Data not available. Please try again.")

	with tab3:
	st.subheader("Deep Learning Model")

	if st.button('Make prediction', key='dl_button'):
	if combined_data is not None:
	column_specs = get_column_specs()
	prepared_data = prepare_data(combined_data, column_specs["timestamp_column"])

	train_file = 'data/processed/train_dataset.csv'
	train_data = pd.read_csv(train_file)
	train_data = prepare_data(train_data, column_specs["timestamp_column"])
	predictions = zeroshot_eval(
	train_df=train_data,
	test_df=prepared_data,
	batch_size=8,
	model_path="iaravagni/ttm-finetuned-model"
	)

	# Get all step columns
	step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")]

	# Apply simple diagonal averaging by patient
	final_results = simple_diagonal_averaging(
	predictions["predictions_df"],
	prepared_data,
	CONTEXT_LENGTH,
	step_columns
	)

	# Visualize predictions vs actual values
	fig, ax = plt.subplots(figsize=(10, 6))

	# Filter out zero predictions
	non_zero_mask = final_results['averaged_prediction'] != 0
	filtered_results = final_results[non_zero_mask]

	# Plot predictions (only non-zero values) in green
	ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'],
	label='Predicted', alpha=0.7, color='#58A618')

	# Plot actual values (all data) in blue
	ax.plot(final_results['Timestamp'], final_results['Glucose'],
	label='Ground truth', alpha=0.7, color='#1f77b4')

	ax.set_title('Glucose Predictions vs Actual Values')
	ax.set_xlabel('Time')
	ax.set_ylabel('Glucose Level')
	ax.legend()

	st.pyplot(fig)

	# Calculate and display metrics for single patient
	y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True)
	y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True)

	# Filter out zero predictions
	non_zero_mask = y_pred != 0
	y_true_filtered = y_true[non_zero_mask]
	y_pred_filtered = y_pred[non_zero_mask]

	if len(y_pred_filtered) > 0:
	rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered))
	st.subheader("Performance Metrics")
	st.metric("AVERAGE RMSE", f"{rmse:.4f}")
	else:
	st.subheader("Performance Metrics")
	st.metric("AVERAGE RMSE", "N/A")
	else:
	st.error("Data not available. Please try again.")
	else:
	st.info("Upload and process data or select a sample dataset to view prediction models.")

	# Add some spacing
	st.write("")
	st.write("")

	# App information and disclaimer
	st.markdown("""
	---
	### About this App

	This application is designed to help analyze and predict glucose levels based on glucose measurements,
	food logs, and physical activity data. The app merges these datasets based on timestamps to identify
	patterns and make predictions.

	Please note that this is a demonstration tool and should not be used for medical decisions without
	consultation with healthcare professionals.

	""")

	# Add a footer with the custom color
	st.markdown("""
	<style>
	.footer {
	position: fixed;
	left: 0;
	bottom: 0;
	width: 100%;
	background-color: white;
	color: #58A618;
	text-align: center;
	padding: 10px;
	border-top: 2px solid #58A618;
	}
	</style>
	<div class="footer">
	<p>Glucose Prediction Application © 2025</p>
	</div>
	""", unsafe_allow_html=True)