Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,79 +1,326 @@
|
|
1 |
-
import
|
|
|
2 |
import numpy as np
|
3 |
import os
|
4 |
-
import matplotlib.pyplot as plt
|
5 |
-
from sklearn.model_selection import train_test_split
|
6 |
from sklearn.preprocessing import StandardScaler
|
7 |
-
from sklearn.ensemble import RandomForestClassifier
|
8 |
-
from sklearn.metrics import accuracy_score, classification_report
|
9 |
-
from imblearn.over_sampling import SMOTE
|
10 |
-
from joblib import dump
|
11 |
|
12 |
-
#
|
13 |
-
|
|
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
#
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
#
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
#
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
#
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
#
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
#
|
46 |
-
|
47 |
-
|
48 |
-
print(" Default Threshold Accuracy:", acc)
|
49 |
-
print("\nClassification Report (Threshold=0.5):\n",
|
50 |
-
classification_report(y_test, y_pred))
|
51 |
-
|
52 |
-
# Threshold Tuning
|
53 |
-
print("\n Threshold Tuning Results")
|
54 |
-
y_proba = model.predict_proba(X_test_scaled)[:, 1]
|
55 |
-
|
56 |
-
for thresh in [0.3, 0.4, 0.5, 0.6]:
|
57 |
-
y_pred_thresh = (y_proba >= thresh).astype(int)
|
58 |
-
acc_thresh = accuracy_score(y_test, y_pred_thresh)
|
59 |
-
print(f"\nThreshold = {thresh}")
|
60 |
-
print("Accuracy:", acc_thresh)
|
61 |
-
print(classification_report(y_test, y_pred_thresh, digits=3))
|
62 |
-
|
63 |
-
# Feature Importance
|
64 |
-
importances = model.feature_importances_
|
65 |
-
features = X.columns
|
66 |
-
sorted_idx = np.argsort(importances)[::-1]
|
67 |
-
|
68 |
-
plt.figure(figsize=(8, 5))
|
69 |
-
plt.bar(range(len(importances)), importances[sorted_idx], color="skyblue")
|
70 |
-
plt.xticks(range(len(importances)), features[sorted_idx], rotation=45)
|
71 |
-
plt.title("Feature Importance (RandomForest)")
|
72 |
-
plt.tight_layout()
|
73 |
-
plt.show()
|
74 |
-
|
75 |
-
# Save model & scaler
|
76 |
-
os.makedirs("models", exist_ok=True)
|
77 |
-
dump(model, "models/diabetes.sav")
|
78 |
-
dump(scaler, "models/scaler.sav")
|
79 |
-
print(" Final Model and Scaler saved in 'models/' folder.")
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from joblib import load
|
3 |
import numpy as np
|
4 |
import os
|
|
|
|
|
5 |
from sklearn.preprocessing import StandardScaler
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
# Global variable for model and scaler
|
8 |
+
diabetes_model = None
|
9 |
+
scaler = None
|
10 |
|
11 |
+
# Load the diabetes prediction model with error handling
|
12 |
+
@st.cache_resource
|
13 |
+
def load_model():
|
14 |
+
global diabetes_model, scaler
|
15 |
+
try:
|
16 |
+
diabetes_model = load('models/diabetes.sav')
|
17 |
+
# Try to load scaler if it exists
|
18 |
+
try:
|
19 |
+
scaler = load('models/scaler.sav')
|
20 |
+
except FileNotFoundError:
|
21 |
+
st.warning("Scaler not found. Using raw input values.")
|
22 |
+
scaler = None
|
23 |
+
return True
|
24 |
+
except FileNotFoundError:
|
25 |
+
st.error("β Model file not found at 'models/diabetes.sav'. Please ensure the model file exists.")
|
26 |
+
return False
|
27 |
+
except Exception as e:
|
28 |
+
st.error(f"β Error loading model: {str(e)}")
|
29 |
+
return False
|
30 |
|
31 |
+
# Input validation functions
|
32 |
+
def validate_inputs(pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age):
|
33 |
+
errors = []
|
34 |
+
warnings = []
|
35 |
+
|
36 |
+
# Critical validations (errors)
|
37 |
+
if glucose <= 0:
|
38 |
+
errors.append("Glucose level must be greater than 0")
|
39 |
+
elif glucose < 70:
|
40 |
+
warnings.append("Glucose level seems low (normal fasting: 70-100 mg/dL)")
|
41 |
+
elif glucose > 200:
|
42 |
+
warnings.append("Glucose level is very high - please consult a doctor immediately")
|
43 |
+
|
44 |
+
if bloodpressure <= 0:
|
45 |
+
errors.append("Blood pressure must be greater than 0")
|
46 |
+
elif bloodpressure < 60:
|
47 |
+
warnings.append("Blood pressure seems low (normal systolic: 90-120 mmHg)")
|
48 |
+
elif bloodpressure > 180:
|
49 |
+
warnings.append("Blood pressure is critically high - seek immediate medical attention")
|
50 |
+
|
51 |
+
if bmi <= 0:
|
52 |
+
errors.append("BMI must be greater than 0")
|
53 |
+
elif bmi < 16:
|
54 |
+
warnings.append("BMI indicates severe underweight")
|
55 |
+
elif bmi > 40:
|
56 |
+
warnings.append("BMI indicates severe obesity")
|
57 |
+
|
58 |
+
if age <= 0:
|
59 |
+
errors.append("Age must be greater than 0")
|
60 |
+
|
61 |
+
# Logical validations
|
62 |
+
if pregnancies > 0 and age < 12:
|
63 |
+
errors.append("Age seems too low for the number of pregnancies")
|
64 |
+
|
65 |
+
if insulin > 0 and insulin < 10:
|
66 |
+
warnings.append("Insulin level seems unusually low")
|
67 |
+
elif insulin > 300:
|
68 |
+
warnings.append("Insulin level is very high")
|
69 |
+
|
70 |
+
if skinthickness > 50:
|
71 |
+
warnings.append("Skin thickness measurement seems unusually high")
|
72 |
+
|
73 |
+
if diabetespedigree > 1.5:
|
74 |
+
warnings.append("Diabetes pedigree function value is quite high")
|
75 |
+
|
76 |
+
return errors, warnings
|
77 |
|
78 |
+
# Function to preprocess input data
|
79 |
+
def preprocess_input(pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age):
|
80 |
+
try:
|
81 |
+
# Create input array
|
82 |
+
input_data = np.array([[pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age]])
|
83 |
+
|
84 |
+
# Apply scaling if scaler is available
|
85 |
+
if scaler is not None:
|
86 |
+
input_data = scaler.transform(input_data)
|
87 |
+
|
88 |
+
return input_data
|
89 |
+
except Exception as e:
|
90 |
+
st.error(f"Error preprocessing input data: {str(e)}")
|
91 |
+
return None
|
92 |
|
93 |
+
# Function to predict diabetes with probability
|
94 |
+
def predict_diabetes_with_probability(pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age):
|
95 |
+
try:
|
96 |
+
# Preprocess input
|
97 |
+
input_data = preprocess_input(pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age)
|
98 |
+
|
99 |
+
if input_data is None:
|
100 |
+
return None, None
|
101 |
+
|
102 |
+
# Make prediction
|
103 |
+
prediction = diabetes_model.predict(input_data)
|
104 |
+
|
105 |
+
# Get probability if available
|
106 |
+
try:
|
107 |
+
probability = diabetes_model.predict_proba(input_data)
|
108 |
+
prob_positive = probability[0][1] * 100 # Probability of positive class
|
109 |
+
except AttributeError:
|
110 |
+
# Model doesn't support predict_proba
|
111 |
+
prob_positive = None
|
112 |
+
|
113 |
+
return bool(prediction[0]), prob_positive
|
114 |
+
|
115 |
+
except Exception as e:
|
116 |
+
st.error(f"Error making prediction: {str(e)}")
|
117 |
+
return None, None
|
118 |
|
119 |
+
# Function to display logo with error handling
|
120 |
+
def display_logo():
|
121 |
+
logo_path = "static/logo.png"
|
122 |
+
if os.path.exists(logo_path):
|
123 |
+
try:
|
124 |
+
st.image(logo_path, use_column_width=True)
|
125 |
+
except Exception as e:
|
126 |
+
st.warning(f"Could not load logo: {str(e)}")
|
127 |
+
else:
|
128 |
+
# Display a simple text logo instead
|
129 |
+
st.markdown("""
|
130 |
+
<div style='text-align: center; padding: 20px;'>
|
131 |
+
<h2 style='color: #1f77b4;'>π©Ί DIABETES PREDICTION</h2>
|
132 |
+
</div>
|
133 |
+
""", unsafe_allow_html=True)
|
134 |
|
135 |
+
# Function to display results with enhanced formatting
|
136 |
+
def display_results(name, prediction, probability, errors, warnings):
|
137 |
+
if prediction is None:
|
138 |
+
st.error("β Could not make prediction due to errors.")
|
139 |
+
return
|
140 |
+
|
141 |
+
# Display warnings if any
|
142 |
+
if warnings:
|
143 |
+
st.warning("β οΈ **Please note the following:**")
|
144 |
+
for warning in warnings:
|
145 |
+
st.warning(f"β’ {warning}")
|
146 |
+
|
147 |
+
# Display result
|
148 |
+
if prediction:
|
149 |
+
st.error(f"π΄ **Hello {name}, your diabetes risk assessment is: POSITIVE**")
|
150 |
+
if probability:
|
151 |
+
st.error(f"**Risk Probability: {probability:.1f}%**")
|
152 |
+
st.markdown("""
|
153 |
+
**β οΈ IMPORTANT:** This is a preliminary assessment. Please consult with a healthcare professional
|
154 |
+
for proper medical diagnosis and treatment.
|
155 |
+
""")
|
156 |
+
else:
|
157 |
+
st.success(f"β
**Hello {name}, your diabetes risk assessment is: NEGATIVE**")
|
158 |
+
if probability:
|
159 |
+
st.success(f"**Risk Probability: {probability:.1f}%**")
|
160 |
+
st.markdown("""
|
161 |
+
**βΉοΈ NOTE:** This indicates lower risk based on the provided parameters. Continue maintaining
|
162 |
+
a healthy lifestyle and regular medical check-ups.
|
163 |
+
""")
|
164 |
|
165 |
+
# Streamlit app
|
166 |
+
def main():
|
167 |
+
# Set page config
|
168 |
+
st.set_page_config(
|
169 |
+
page_title="Diabetes Prediction App",
|
170 |
+
page_icon="π",
|
171 |
+
layout="centered",
|
172 |
+
initial_sidebar_state="expanded",
|
173 |
+
)
|
174 |
+
|
175 |
+
# Load model
|
176 |
+
if not load_model():
|
177 |
+
st.stop()
|
178 |
+
|
179 |
+
# Header
|
180 |
+
st.title("π©Ί Diabetes Risk Assessment Tool")
|
181 |
+
display_logo()
|
182 |
+
|
183 |
+
st.markdown("""
|
184 |
+
This tool uses machine learning to assess diabetes risk based on clinical parameters.
|
185 |
+
**Please note:** This is for educational purposes only and should not replace professional medical advice.
|
186 |
+
""")
|
187 |
+
|
188 |
+
# Input form
|
189 |
+
st.sidebar.header("π Patient Information")
|
190 |
+
|
191 |
+
# User details
|
192 |
+
name = st.sidebar.text_input("π€ Full Name", placeholder="Enter patient name", help="Enter the patient's full name")
|
193 |
+
|
194 |
+
if not name:
|
195 |
+
st.info("π Please enter patient details in the sidebar to begin assessment.")
|
196 |
+
return
|
197 |
+
|
198 |
+
# Clinical details
|
199 |
+
st.sidebar.subheader("π¬ Clinical Parameters")
|
200 |
+
|
201 |
+
# Create two columns for better organization
|
202 |
+
col1, col2 = st.sidebar.columns(2)
|
203 |
+
|
204 |
+
with col1:
|
205 |
+
pregnancies = st.number_input("π€± Pregnancies", min_value=0, max_value=20, value=0,
|
206 |
+
help="Number of times pregnant")
|
207 |
+
glucose = st.number_input("π― Glucose (mg/dL)", min_value=0, max_value=300, value=120,
|
208 |
+
help="Plasma glucose concentration (normal fasting: 70-100)")
|
209 |
+
bloodpressure = st.number_input("π Blood Pressure (mmHg)", min_value=0, max_value=200, value=80,
|
210 |
+
help="Diastolic blood pressure (normal: 60-80)")
|
211 |
+
insulin = st.number_input("π Insulin (mu U/ml)", min_value=0, max_value=500, value=0,
|
212 |
+
help="2-Hour serum insulin")
|
213 |
+
|
214 |
+
with col2:
|
215 |
+
bmi = st.number_input("βοΈ BMI", min_value=0.0, max_value=50.0, value=25.0, format="%.1f",
|
216 |
+
help="Body mass index (normal: 18.5-24.9)")
|
217 |
+
diabetespedigree = st.number_input("𧬠Diabetes Pedigree", min_value=0.0, max_value=2.0, value=0.5, format="%.3f",
|
218 |
+
help="Diabetes pedigree function (family history)")
|
219 |
+
age = st.number_input("π
Age (years)", min_value=1, max_value=100, value=30,
|
220 |
+
help="Age in years")
|
221 |
+
skinthickness = st.number_input("π Skin Thickness (mm)", min_value=0, max_value=100, value=20,
|
222 |
+
help="Triceps skin fold thickness")
|
223 |
+
|
224 |
+
# Add reference ranges
|
225 |
+
st.sidebar.markdown("""
|
226 |
+
**π Reference Ranges:**
|
227 |
+
- Glucose (fasting): 70-100 mg/dL
|
228 |
+
- Blood Pressure: 60-80 mmHg (diastolic)
|
229 |
+
- BMI: 18.5-24.9 (normal)
|
230 |
+
- Age: Consider diabetes risk increases with age
|
231 |
+
""")
|
232 |
+
|
233 |
+
# Prediction button
|
234 |
+
if st.sidebar.button("π Assess Diabetes Risk", key="predict_button"):
|
235 |
+
# Validate inputs
|
236 |
+
errors, warnings = validate_inputs(pregnancies, glucose, bloodpressure, skinthickness,
|
237 |
+
insulin, bmi, diabetespedigree, age)
|
238 |
+
|
239 |
+
if errors:
|
240 |
+
st.error("β **Input Validation Errors:**")
|
241 |
+
for error in errors:
|
242 |
+
st.error(f"β’ {error}")
|
243 |
+
st.info("Please correct the errors above and try again.")
|
244 |
+
return
|
245 |
+
|
246 |
+
# Make prediction
|
247 |
+
with st.spinner("π Analyzing patient data..."):
|
248 |
+
prediction, probability = predict_diabetes_with_probability(
|
249 |
+
pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age
|
250 |
+
)
|
251 |
+
|
252 |
+
# Display results
|
253 |
+
display_results(name, prediction, probability, errors, warnings)
|
254 |
+
|
255 |
+
# Display input summary
|
256 |
+
with st.expander("π Input Summary"):
|
257 |
+
col1, col2 = st.columns(2)
|
258 |
+
with col1:
|
259 |
+
st.write(f"**Pregnancies:** {pregnancies}")
|
260 |
+
st.write(f"**Glucose:** {glucose} mg/dL")
|
261 |
+
st.write(f"**Blood Pressure:** {bloodpressure} mmHg")
|
262 |
+
st.write(f"**Insulin:** {insulin} mu U/ml")
|
263 |
+
with col2:
|
264 |
+
st.write(f"**BMI:** {bmi:.1f}")
|
265 |
+
st.write(f"**Diabetes Pedigree:** {diabetespedigree:.3f}")
|
266 |
+
st.write(f"**Age:** {age} years")
|
267 |
+
st.write(f"**Skin Thickness:** {skinthickness} mm")
|
268 |
+
|
269 |
+
# Footer
|
270 |
+
st.sidebar.markdown("---")
|
271 |
+
st.sidebar.markdown("""
|
272 |
+
**β οΈ Medical Disclaimer:**
|
273 |
+
This tool is for educational purposes only.
|
274 |
+
Always consult healthcare professionals for medical advice.
|
275 |
+
|
276 |
+
**π Emergency:** If you have severe symptoms, seek immediate medical attention.
|
277 |
+
|
278 |
+
Β© 2024 Diabetes Prediction App
|
279 |
+
""")
|
280 |
+
|
281 |
+
# Additional information in main area
|
282 |
+
if name:
|
283 |
+
st.markdown("---")
|
284 |
+
st.subheader("π About Diabetes Risk Factors")
|
285 |
+
|
286 |
+
tab1, tab2, tab3 = st.tabs(["π Risk Factors", "π Understanding Results", "π₯ Next Steps"])
|
287 |
+
|
288 |
+
with tab1:
|
289 |
+
st.markdown("""
|
290 |
+
**Key Risk Factors for Diabetes:**
|
291 |
+
- **Age:** Risk increases with age, especially after 45
|
292 |
+
- **BMI:** Higher BMI indicates increased risk
|
293 |
+
- **Family History:** Genetic predisposition (pedigree function)
|
294 |
+
- **Blood Glucose:** Elevated glucose levels
|
295 |
+
- **Blood Pressure:** Hypertension is linked to diabetes risk
|
296 |
+
- **Previous Pregnancies:** Gestational diabetes history
|
297 |
+
""")
|
298 |
+
|
299 |
+
with tab2:
|
300 |
+
st.markdown("""
|
301 |
+
**Understanding Your Results:**
|
302 |
+
- **NEGATIVE:** Lower risk based on current parameters
|
303 |
+
- **POSITIVE:** Higher risk - requires medical evaluation
|
304 |
+
- **Probability Score:** When available, shows percentage risk
|
305 |
+
|
306 |
+
**Important:** This is a screening tool, not a diagnosis.
|
307 |
+
""")
|
308 |
+
|
309 |
+
with tab3:
|
310 |
+
st.markdown("""
|
311 |
+
**Recommended Next Steps:**
|
312 |
+
|
313 |
+
**If Result is POSITIVE:**
|
314 |
+
- π₯ Schedule appointment with healthcare provider
|
315 |
+
- π©Έ Request proper diagnostic tests (HbA1c, OGTT)
|
316 |
+
- π Discuss family history and symptoms
|
317 |
+
|
318 |
+
**If Result is NEGATIVE:**
|
319 |
+
- π Continue regular health check-ups
|
320 |
+
- π₯ Maintain healthy diet and exercise
|
321 |
+
- π
Rescreen annually if risk factors present
|
322 |
+
""")
|
323 |
|
324 |
+
# Run the Streamlit app
|
325 |
+
if __name__ == "__main__":
|
326 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|