LovnishVerma commited on
Commit
ccb0135
Β·
verified Β·
1 Parent(s): b1055f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +314 -67
app.py CHANGED
@@ -1,79 +1,326 @@
1
- import pandas as pd
 
2
  import numpy as np
3
  import os
4
- import matplotlib.pyplot as plt
5
- from sklearn.model_selection import train_test_split
6
  from sklearn.preprocessing import StandardScaler
7
- from sklearn.ensemble import RandomForestClassifier
8
- from sklearn.metrics import accuracy_score, classification_report
9
- from imblearn.over_sampling import SMOTE
10
- from joblib import dump
11
 
12
- # Load dataset
13
- data = pd.read_csv("dia.csv")
 
14
 
15
- # Features & Target
16
- X = data.drop(columns=["Outcome"])
17
- y = data["Outcome"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- # Replace zero values in certain columns (except Pregnancies & Outcome)
20
- cols_with_zero = ["Glucose", "BloodPressure",
21
- "SkinThickness", "Insulin", "BMI"]
22
- X[cols_with_zero] = X[cols_with_zero].replace(0, np.nan)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Fill missing with median
25
- X = X.fillna(X.median())
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Train-Test Split
28
- X_train, X_test, y_train, y_test = train_test_split(
29
- X, y, test_size=0.2, random_state=42, stratify=y
30
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- # Scale
33
- scaler = StandardScaler()
34
- X_train_scaled = scaler.fit_transform(X_train)
35
- X_test_scaled = scaler.transform(X_test)
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Balance dataset with SMOTE
38
- sm = SMOTE(random_state=42)
39
- X_train_bal, y_train_bal = sm.fit_resample(X_train_scaled, y_train)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # Train Random Forest
42
- model = RandomForestClassifier(n_estimators=200, random_state=42)
43
- model.fit(X_train_bal, y_train_bal)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- # Evaluation @ Default Threshold 0.5
46
- y_pred = model.predict(X_test_scaled)
47
- acc = accuracy_score(y_test, y_pred)
48
- print(" Default Threshold Accuracy:", acc)
49
- print("\nClassification Report (Threshold=0.5):\n",
50
- classification_report(y_test, y_pred))
51
-
52
- # Threshold Tuning
53
- print("\n Threshold Tuning Results")
54
- y_proba = model.predict_proba(X_test_scaled)[:, 1]
55
-
56
- for thresh in [0.3, 0.4, 0.5, 0.6]:
57
- y_pred_thresh = (y_proba >= thresh).astype(int)
58
- acc_thresh = accuracy_score(y_test, y_pred_thresh)
59
- print(f"\nThreshold = {thresh}")
60
- print("Accuracy:", acc_thresh)
61
- print(classification_report(y_test, y_pred_thresh, digits=3))
62
-
63
- # Feature Importance
64
- importances = model.feature_importances_
65
- features = X.columns
66
- sorted_idx = np.argsort(importances)[::-1]
67
-
68
- plt.figure(figsize=(8, 5))
69
- plt.bar(range(len(importances)), importances[sorted_idx], color="skyblue")
70
- plt.xticks(range(len(importances)), features[sorted_idx], rotation=45)
71
- plt.title("Feature Importance (RandomForest)")
72
- plt.tight_layout()
73
- plt.show()
74
-
75
- # Save model & scaler
76
- os.makedirs("models", exist_ok=True)
77
- dump(model, "models/diabetes.sav")
78
- dump(scaler, "models/scaler.sav")
79
- print(" Final Model and Scaler saved in 'models/' folder.")
 
1
+ import streamlit as st
2
+ from joblib import load
3
  import numpy as np
4
  import os
 
 
5
  from sklearn.preprocessing import StandardScaler
 
 
 
 
6
 
7
+ # Global variable for model and scaler
8
+ diabetes_model = None
9
+ scaler = None
10
 
11
+ # Load the diabetes prediction model with error handling
12
+ @st.cache_resource
13
+ def load_model():
14
+ global diabetes_model, scaler
15
+ try:
16
+ diabetes_model = load('models/diabetes.sav')
17
+ # Try to load scaler if it exists
18
+ try:
19
+ scaler = load('models/scaler.sav')
20
+ except FileNotFoundError:
21
+ st.warning("Scaler not found. Using raw input values.")
22
+ scaler = None
23
+ return True
24
+ except FileNotFoundError:
25
+ st.error("❌ Model file not found at 'models/diabetes.sav'. Please ensure the model file exists.")
26
+ return False
27
+ except Exception as e:
28
+ st.error(f"❌ Error loading model: {str(e)}")
29
+ return False
30
 
31
+ # Input validation functions
32
+ def validate_inputs(pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age):
33
+ errors = []
34
+ warnings = []
35
+
36
+ # Critical validations (errors)
37
+ if glucose <= 0:
38
+ errors.append("Glucose level must be greater than 0")
39
+ elif glucose < 70:
40
+ warnings.append("Glucose level seems low (normal fasting: 70-100 mg/dL)")
41
+ elif glucose > 200:
42
+ warnings.append("Glucose level is very high - please consult a doctor immediately")
43
+
44
+ if bloodpressure <= 0:
45
+ errors.append("Blood pressure must be greater than 0")
46
+ elif bloodpressure < 60:
47
+ warnings.append("Blood pressure seems low (normal systolic: 90-120 mmHg)")
48
+ elif bloodpressure > 180:
49
+ warnings.append("Blood pressure is critically high - seek immediate medical attention")
50
+
51
+ if bmi <= 0:
52
+ errors.append("BMI must be greater than 0")
53
+ elif bmi < 16:
54
+ warnings.append("BMI indicates severe underweight")
55
+ elif bmi > 40:
56
+ warnings.append("BMI indicates severe obesity")
57
+
58
+ if age <= 0:
59
+ errors.append("Age must be greater than 0")
60
+
61
+ # Logical validations
62
+ if pregnancies > 0 and age < 12:
63
+ errors.append("Age seems too low for the number of pregnancies")
64
+
65
+ if insulin > 0 and insulin < 10:
66
+ warnings.append("Insulin level seems unusually low")
67
+ elif insulin > 300:
68
+ warnings.append("Insulin level is very high")
69
+
70
+ if skinthickness > 50:
71
+ warnings.append("Skin thickness measurement seems unusually high")
72
+
73
+ if diabetespedigree > 1.5:
74
+ warnings.append("Diabetes pedigree function value is quite high")
75
+
76
+ return errors, warnings
77
 
78
+ # Function to preprocess input data
79
+ def preprocess_input(pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age):
80
+ try:
81
+ # Create input array
82
+ input_data = np.array([[pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age]])
83
+
84
+ # Apply scaling if scaler is available
85
+ if scaler is not None:
86
+ input_data = scaler.transform(input_data)
87
+
88
+ return input_data
89
+ except Exception as e:
90
+ st.error(f"Error preprocessing input data: {str(e)}")
91
+ return None
92
 
93
+ # Function to predict diabetes with probability
94
+ def predict_diabetes_with_probability(pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age):
95
+ try:
96
+ # Preprocess input
97
+ input_data = preprocess_input(pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age)
98
+
99
+ if input_data is None:
100
+ return None, None
101
+
102
+ # Make prediction
103
+ prediction = diabetes_model.predict(input_data)
104
+
105
+ # Get probability if available
106
+ try:
107
+ probability = diabetes_model.predict_proba(input_data)
108
+ prob_positive = probability[0][1] * 100 # Probability of positive class
109
+ except AttributeError:
110
+ # Model doesn't support predict_proba
111
+ prob_positive = None
112
+
113
+ return bool(prediction[0]), prob_positive
114
+
115
+ except Exception as e:
116
+ st.error(f"Error making prediction: {str(e)}")
117
+ return None, None
118
 
119
+ # Function to display logo with error handling
120
+ def display_logo():
121
+ logo_path = "static/logo.png"
122
+ if os.path.exists(logo_path):
123
+ try:
124
+ st.image(logo_path, use_column_width=True)
125
+ except Exception as e:
126
+ st.warning(f"Could not load logo: {str(e)}")
127
+ else:
128
+ # Display a simple text logo instead
129
+ st.markdown("""
130
+ <div style='text-align: center; padding: 20px;'>
131
+ <h2 style='color: #1f77b4;'>🩺 DIABETES PREDICTION</h2>
132
+ </div>
133
+ """, unsafe_allow_html=True)
134
 
135
+ # Function to display results with enhanced formatting
136
+ def display_results(name, prediction, probability, errors, warnings):
137
+ if prediction is None:
138
+ st.error("❌ Could not make prediction due to errors.")
139
+ return
140
+
141
+ # Display warnings if any
142
+ if warnings:
143
+ st.warning("⚠️ **Please note the following:**")
144
+ for warning in warnings:
145
+ st.warning(f"β€’ {warning}")
146
+
147
+ # Display result
148
+ if prediction:
149
+ st.error(f"πŸ”΄ **Hello {name}, your diabetes risk assessment is: POSITIVE**")
150
+ if probability:
151
+ st.error(f"**Risk Probability: {probability:.1f}%**")
152
+ st.markdown("""
153
+ **⚠️ IMPORTANT:** This is a preliminary assessment. Please consult with a healthcare professional
154
+ for proper medical diagnosis and treatment.
155
+ """)
156
+ else:
157
+ st.success(f"βœ… **Hello {name}, your diabetes risk assessment is: NEGATIVE**")
158
+ if probability:
159
+ st.success(f"**Risk Probability: {probability:.1f}%**")
160
+ st.markdown("""
161
+ **ℹ️ NOTE:** This indicates lower risk based on the provided parameters. Continue maintaining
162
+ a healthy lifestyle and regular medical check-ups.
163
+ """)
164
 
165
+ # Streamlit app
166
+ def main():
167
+ # Set page config
168
+ st.set_page_config(
169
+ page_title="Diabetes Prediction App",
170
+ page_icon="πŸ’‰",
171
+ layout="centered",
172
+ initial_sidebar_state="expanded",
173
+ )
174
+
175
+ # Load model
176
+ if not load_model():
177
+ st.stop()
178
+
179
+ # Header
180
+ st.title("🩺 Diabetes Risk Assessment Tool")
181
+ display_logo()
182
+
183
+ st.markdown("""
184
+ This tool uses machine learning to assess diabetes risk based on clinical parameters.
185
+ **Please note:** This is for educational purposes only and should not replace professional medical advice.
186
+ """)
187
+
188
+ # Input form
189
+ st.sidebar.header("πŸ“‹ Patient Information")
190
+
191
+ # User details
192
+ name = st.sidebar.text_input("πŸ‘€ Full Name", placeholder="Enter patient name", help="Enter the patient's full name")
193
+
194
+ if not name:
195
+ st.info("πŸ‘ˆ Please enter patient details in the sidebar to begin assessment.")
196
+ return
197
+
198
+ # Clinical details
199
+ st.sidebar.subheader("πŸ”¬ Clinical Parameters")
200
+
201
+ # Create two columns for better organization
202
+ col1, col2 = st.sidebar.columns(2)
203
+
204
+ with col1:
205
+ pregnancies = st.number_input("🀱 Pregnancies", min_value=0, max_value=20, value=0,
206
+ help="Number of times pregnant")
207
+ glucose = st.number_input("🍯 Glucose (mg/dL)", min_value=0, max_value=300, value=120,
208
+ help="Plasma glucose concentration (normal fasting: 70-100)")
209
+ bloodpressure = st.number_input("πŸ’“ Blood Pressure (mmHg)", min_value=0, max_value=200, value=80,
210
+ help="Diastolic blood pressure (normal: 60-80)")
211
+ insulin = st.number_input("πŸ’‰ Insulin (mu U/ml)", min_value=0, max_value=500, value=0,
212
+ help="2-Hour serum insulin")
213
+
214
+ with col2:
215
+ bmi = st.number_input("βš–οΈ BMI", min_value=0.0, max_value=50.0, value=25.0, format="%.1f",
216
+ help="Body mass index (normal: 18.5-24.9)")
217
+ diabetespedigree = st.number_input("🧬 Diabetes Pedigree", min_value=0.0, max_value=2.0, value=0.5, format="%.3f",
218
+ help="Diabetes pedigree function (family history)")
219
+ age = st.number_input("πŸ“… Age (years)", min_value=1, max_value=100, value=30,
220
+ help="Age in years")
221
+ skinthickness = st.number_input("πŸ“ Skin Thickness (mm)", min_value=0, max_value=100, value=20,
222
+ help="Triceps skin fold thickness")
223
+
224
+ # Add reference ranges
225
+ st.sidebar.markdown("""
226
+ **πŸ“Š Reference Ranges:**
227
+ - Glucose (fasting): 70-100 mg/dL
228
+ - Blood Pressure: 60-80 mmHg (diastolic)
229
+ - BMI: 18.5-24.9 (normal)
230
+ - Age: Consider diabetes risk increases with age
231
+ """)
232
+
233
+ # Prediction button
234
+ if st.sidebar.button("πŸ” Assess Diabetes Risk", key="predict_button"):
235
+ # Validate inputs
236
+ errors, warnings = validate_inputs(pregnancies, glucose, bloodpressure, skinthickness,
237
+ insulin, bmi, diabetespedigree, age)
238
+
239
+ if errors:
240
+ st.error("❌ **Input Validation Errors:**")
241
+ for error in errors:
242
+ st.error(f"β€’ {error}")
243
+ st.info("Please correct the errors above and try again.")
244
+ return
245
+
246
+ # Make prediction
247
+ with st.spinner("πŸ”„ Analyzing patient data..."):
248
+ prediction, probability = predict_diabetes_with_probability(
249
+ pregnancies, glucose, bloodpressure, skinthickness, insulin, bmi, diabetespedigree, age
250
+ )
251
+
252
+ # Display results
253
+ display_results(name, prediction, probability, errors, warnings)
254
+
255
+ # Display input summary
256
+ with st.expander("πŸ“‹ Input Summary"):
257
+ col1, col2 = st.columns(2)
258
+ with col1:
259
+ st.write(f"**Pregnancies:** {pregnancies}")
260
+ st.write(f"**Glucose:** {glucose} mg/dL")
261
+ st.write(f"**Blood Pressure:** {bloodpressure} mmHg")
262
+ st.write(f"**Insulin:** {insulin} mu U/ml")
263
+ with col2:
264
+ st.write(f"**BMI:** {bmi:.1f}")
265
+ st.write(f"**Diabetes Pedigree:** {diabetespedigree:.3f}")
266
+ st.write(f"**Age:** {age} years")
267
+ st.write(f"**Skin Thickness:** {skinthickness} mm")
268
+
269
+ # Footer
270
+ st.sidebar.markdown("---")
271
+ st.sidebar.markdown("""
272
+ **⚠️ Medical Disclaimer:**
273
+ This tool is for educational purposes only.
274
+ Always consult healthcare professionals for medical advice.
275
+
276
+ **πŸ“ž Emergency:** If you have severe symptoms, seek immediate medical attention.
277
+
278
+ Β© 2024 Diabetes Prediction App
279
+ """)
280
+
281
+ # Additional information in main area
282
+ if name:
283
+ st.markdown("---")
284
+ st.subheader("πŸ“š About Diabetes Risk Factors")
285
+
286
+ tab1, tab2, tab3 = st.tabs(["πŸ” Risk Factors", "πŸ“Š Understanding Results", "πŸ₯ Next Steps"])
287
+
288
+ with tab1:
289
+ st.markdown("""
290
+ **Key Risk Factors for Diabetes:**
291
+ - **Age:** Risk increases with age, especially after 45
292
+ - **BMI:** Higher BMI indicates increased risk
293
+ - **Family History:** Genetic predisposition (pedigree function)
294
+ - **Blood Glucose:** Elevated glucose levels
295
+ - **Blood Pressure:** Hypertension is linked to diabetes risk
296
+ - **Previous Pregnancies:** Gestational diabetes history
297
+ """)
298
+
299
+ with tab2:
300
+ st.markdown("""
301
+ **Understanding Your Results:**
302
+ - **NEGATIVE:** Lower risk based on current parameters
303
+ - **POSITIVE:** Higher risk - requires medical evaluation
304
+ - **Probability Score:** When available, shows percentage risk
305
+
306
+ **Important:** This is a screening tool, not a diagnosis.
307
+ """)
308
+
309
+ with tab3:
310
+ st.markdown("""
311
+ **Recommended Next Steps:**
312
+
313
+ **If Result is POSITIVE:**
314
+ - πŸ₯ Schedule appointment with healthcare provider
315
+ - 🩸 Request proper diagnostic tests (HbA1c, OGTT)
316
+ - πŸ“ Discuss family history and symptoms
317
+
318
+ **If Result is NEGATIVE:**
319
+ - πŸ”„ Continue regular health check-ups
320
+ - πŸ₯— Maintain healthy diet and exercise
321
+ - πŸ“… Rescreen annually if risk factors present
322
+ """)
323
 
324
+ # Run the Streamlit app
325
+ if __name__ == "__main__":
326
+ main()