LovnishVerma commited on
Commit
b1055f3
·
verified ·
1 Parent(s): a51279b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -91
app.py CHANGED
@@ -1,91 +1,79 @@
1
- import pandas as pd
2
- import numpy as np
3
- import os
4
- import matplotlib.pyplot as plt
5
- from sklearn.model_selection import train_test_split
6
- from sklearn.preprocessing import StandardScaler
7
- from sklearn.ensemble import RandomForestClassifier
8
- from sklearn.metrics import accuracy_score, classification_report
9
- from imblearn.over_sampling import SMOTE
10
- from joblib import dump
11
-
12
- # ==========================
13
- # Load dataset
14
- # ==========================
15
- data = pd.read_csv("dia.csv")
16
-
17
- # Features & Target
18
- X = data.drop(columns=["Outcome"])
19
- y = data["Outcome"]
20
-
21
- # Replace zero values in certain columns (except Pregnancies & Outcome)
22
- cols_with_zero = ["Glucose", "BloodPressure",
23
- "SkinThickness", "Insulin", "BMI"]
24
- X[cols_with_zero] = X[cols_with_zero].replace(0, np.nan)
25
-
26
- # Fill missing with median
27
- X = X.fillna(X.median())
28
-
29
- # Train-Test Split
30
- X_train, X_test, y_train, y_test = train_test_split(
31
- X, y, test_size=0.2, random_state=42, stratify=y
32
- )
33
-
34
- # Scale
35
- scaler = StandardScaler()
36
- X_train_scaled = scaler.fit_transform(X_train)
37
- X_test_scaled = scaler.transform(X_test)
38
-
39
- # Balance dataset with SMOTE
40
- sm = SMOTE(random_state=42)
41
- X_train_bal, y_train_bal = sm.fit_resample(X_train_scaled, y_train)
42
-
43
- # ==========================
44
- # Train Random Forest
45
- # ==========================
46
- model = RandomForestClassifier(n_estimators=200, random_state=42)
47
- model.fit(X_train_bal, y_train_bal)
48
-
49
- # ==========================
50
- # Evaluation @ Default Threshold 0.5
51
- # ==========================
52
- y_pred = model.predict(X_test_scaled)
53
- acc = accuracy_score(y_test, y_pred)
54
- print("✅ Default Threshold Accuracy:", acc)
55
- print("\nClassification Report (Threshold=0.5):\n",
56
- classification_report(y_test, y_pred))
57
-
58
- # ==========================
59
- # Threshold Tuning
60
- # ==========================
61
- print("\n🔎 Threshold Tuning Results")
62
- y_proba = model.predict_proba(X_test_scaled)[:, 1]
63
-
64
- for thresh in [0.3, 0.4, 0.5, 0.6]:
65
- y_pred_thresh = (y_proba >= thresh).astype(int)
66
- acc_thresh = accuracy_score(y_test, y_pred_thresh)
67
- print(f"\nThreshold = {thresh}")
68
- print("Accuracy:", acc_thresh)
69
- print(classification_report(y_test, y_pred_thresh, digits=3))
70
-
71
- # ==========================
72
- # Feature Importance
73
- # ==========================
74
- importances = model.feature_importances_
75
- features = X.columns
76
- sorted_idx = np.argsort(importances)[::-1]
77
-
78
- plt.figure(figsize=(8, 5))
79
- plt.bar(range(len(importances)), importances[sorted_idx], color="skyblue")
80
- plt.xticks(range(len(importances)), features[sorted_idx], rotation=45)
81
- plt.title("Feature Importance (RandomForest)")
82
- plt.tight_layout()
83
- plt.show()
84
-
85
- # ==========================
86
- # Save model & scaler
87
- # ==========================
88
- os.makedirs("models", exist_ok=True)
89
- dump(model, "models/diabetes.sav")
90
- dump(scaler, "models/scaler.sav")
91
- print("✅ Final Model and Scaler saved in 'models/' folder.")
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import os
4
+ import matplotlib.pyplot as plt
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import StandardScaler
7
+ from sklearn.ensemble import RandomForestClassifier
8
+ from sklearn.metrics import accuracy_score, classification_report
9
+ from imblearn.over_sampling import SMOTE
10
+ from joblib import dump
11
+
12
+ # Load dataset
13
+ data = pd.read_csv("dia.csv")
14
+
15
+ # Features & Target
16
+ X = data.drop(columns=["Outcome"])
17
+ y = data["Outcome"]
18
+
19
+ # Replace zero values in certain columns (except Pregnancies & Outcome)
20
+ cols_with_zero = ["Glucose", "BloodPressure",
21
+ "SkinThickness", "Insulin", "BMI"]
22
+ X[cols_with_zero] = X[cols_with_zero].replace(0, np.nan)
23
+
24
+ # Fill missing with median
25
+ X = X.fillna(X.median())
26
+
27
+ # Train-Test Split
28
+ X_train, X_test, y_train, y_test = train_test_split(
29
+ X, y, test_size=0.2, random_state=42, stratify=y
30
+ )
31
+
32
+ # Scale
33
+ scaler = StandardScaler()
34
+ X_train_scaled = scaler.fit_transform(X_train)
35
+ X_test_scaled = scaler.transform(X_test)
36
+
37
+ # Balance dataset with SMOTE
38
+ sm = SMOTE(random_state=42)
39
+ X_train_bal, y_train_bal = sm.fit_resample(X_train_scaled, y_train)
40
+
41
+ # Train Random Forest
42
+ model = RandomForestClassifier(n_estimators=200, random_state=42)
43
+ model.fit(X_train_bal, y_train_bal)
44
+
45
+ # Evaluation @ Default Threshold 0.5
46
+ y_pred = model.predict(X_test_scaled)
47
+ acc = accuracy_score(y_test, y_pred)
48
+ print(" Default Threshold Accuracy:", acc)
49
+ print("\nClassification Report (Threshold=0.5):\n",
50
+ classification_report(y_test, y_pred))
51
+
52
+ # Threshold Tuning
53
+ print("\n Threshold Tuning Results")
54
+ y_proba = model.predict_proba(X_test_scaled)[:, 1]
55
+
56
+ for thresh in [0.3, 0.4, 0.5, 0.6]:
57
+ y_pred_thresh = (y_proba >= thresh).astype(int)
58
+ acc_thresh = accuracy_score(y_test, y_pred_thresh)
59
+ print(f"\nThreshold = {thresh}")
60
+ print("Accuracy:", acc_thresh)
61
+ print(classification_report(y_test, y_pred_thresh, digits=3))
62
+
63
+ # Feature Importance
64
+ importances = model.feature_importances_
65
+ features = X.columns
66
+ sorted_idx = np.argsort(importances)[::-1]
67
+
68
+ plt.figure(figsize=(8, 5))
69
+ plt.bar(range(len(importances)), importances[sorted_idx], color="skyblue")
70
+ plt.xticks(range(len(importances)), features[sorted_idx], rotation=45)
71
+ plt.title("Feature Importance (RandomForest)")
72
+ plt.tight_layout()
73
+ plt.show()
74
+
75
+ # Save model & scaler
76
+ os.makedirs("models", exist_ok=True)
77
+ dump(model, "models/diabetes.sav")
78
+ dump(scaler, "models/scaler.sav")
79
+ print(" Final Model and Scaler saved in 'models/' folder.")