OsamaMIT commited on
Commit
096b78b
·
verified ·
1 Parent(s): 9a13cf3

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +87 -87
model.py CHANGED
@@ -1,88 +1,88 @@
1
- import pandas as pd
2
- from sklearn.ensemble import HistGradientBoostingClassifier
3
- from sklearn.model_selection import train_test_split
4
- from sklearn.metrics import classification_report, confusion_matrix
5
- import joblib
6
- from lime.lime_tabular import LimeTabularExplainer
7
-
8
- # Load data
9
- data = pd.read_csv('src/card_transdata.csv')
10
-
11
- # Features and target
12
- X = data.drop(columns=['fraud'])
13
- y = data['fraud']
14
-
15
- # Train/test split
16
- X_train, X_test, y_train, y_test = train_test_split(
17
- X, y,
18
- test_size=0.3,
19
- stratify=y,
20
- random_state=42
21
- )
22
-
23
- # Initialize a gradient-boosting classifier with class imbalance handling
24
- model = HistGradientBoostingClassifier(
25
- loss="log_loss",
26
- class_weight="balanced",
27
- learning_rate=0.05,
28
- max_iter=200,
29
- max_depth=8,
30
- random_state=42
31
- )
32
-
33
- # Train on the training set
34
- model.fit(X_train, y_train)
35
-
36
- # Predict on the test set
37
- y_pred = model.predict(X_test)
38
-
39
- """Comment out the following lines to skip evaluation in prod"""
40
- # print("\nTesting with all transactions...")
41
-
42
- # # Evaluate
43
- # print("Classification Report:")
44
- # print(classification_report(y_test, y_pred, digits=4))
45
-
46
- # print("\nConfusion Matrix:")
47
- # print(confusion_matrix(y_test, y_pred))
48
-
49
-
50
- # # Save and load the model using joblib
51
- def save_model(model, filename='fraud_model.pkl'):
52
- """Saves the trained model to a file."""
53
- joblib.dump(model, filename)
54
- #print(f"Model saved to {filename}")
55
-
56
- save_model(model)
57
-
58
- def load_model(filename='fraud_model.pkl'):
59
- """Loads the saved model from a file."""
60
- model = joblib.load(filename)
61
- #print(f"Model loaded from {filename}")
62
- return model
63
-
64
-
65
- # Initialize LIME explainer on training data
66
- explainer = LimeTabularExplainer(
67
- training_data=X_train.values,
68
- feature_names=X_train.columns.tolist(),
69
- class_names=['not_fraud', 'fraud'],
70
- mode='classification'
71
- )
72
-
73
- def extract_top_features(single_row_df, top_n=3):
74
- # Generate explanation for the 'fraud' class (label=1)
75
- exp = explainer.explain_instance(
76
- single_row_df.values[0],
77
- lambda arr: model.predict_proba(
78
- pd.DataFrame(arr, columns=X_train.columns.tolist())
79
- ),
80
- num_features=top_n
81
- )
82
-
83
- # Get list of (feature, weight) for the fraud prediction
84
- feature_weights = exp.as_list(label=1)
85
- # Format the top features into a string
86
- formatted = "Transaction's top features:\n"
87
- formatted += "\n".join(f" - {feat}: weight {weight:.4f}" for feat, weight in feature_weights)
88
  return formatted
 
1
+ import pandas as pd
2
+ from sklearn.ensemble import HistGradientBoostingClassifier
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.metrics import classification_report, confusion_matrix
5
+ import joblib
6
+ from lime.lime_tabular import LimeTabularExplainer
7
+
8
+ # Load data
9
+ data = pd.read_csv('card_transdata.csv')
10
+
11
+ # Features and target
12
+ X = data.drop(columns=['fraud'])
13
+ y = data['fraud']
14
+
15
+ # Train/test split
16
+ X_train, X_test, y_train, y_test = train_test_split(
17
+ X, y,
18
+ test_size=0.3,
19
+ stratify=y,
20
+ random_state=42
21
+ )
22
+
23
+ # Initialize a gradient-boosting classifier with class imbalance handling
24
+ model = HistGradientBoostingClassifier(
25
+ loss="log_loss",
26
+ class_weight="balanced",
27
+ learning_rate=0.05,
28
+ max_iter=200,
29
+ max_depth=8,
30
+ random_state=42
31
+ )
32
+
33
+ # Train on the training set
34
+ model.fit(X_train, y_train)
35
+
36
+ # Predict on the test set
37
+ y_pred = model.predict(X_test)
38
+
39
+ """Comment out the following lines to skip evaluation in prod"""
40
+ # print("\nTesting with all transactions...")
41
+
42
+ # # Evaluate
43
+ # print("Classification Report:")
44
+ # print(classification_report(y_test, y_pred, digits=4))
45
+
46
+ # print("\nConfusion Matrix:")
47
+ # print(confusion_matrix(y_test, y_pred))
48
+
49
+
50
+ # # Save and load the model using joblib
51
+ def save_model(model, filename='fraud_model.pkl'):
52
+ """Saves the trained model to a file."""
53
+ joblib.dump(model, filename)
54
+ #print(f"Model saved to {filename}")
55
+
56
+ save_model(model)
57
+
58
+ def load_model(filename='fraud_model.pkl'):
59
+ """Loads the saved model from a file."""
60
+ model = joblib.load(filename)
61
+ #print(f"Model loaded from {filename}")
62
+ return model
63
+
64
+
65
+ # Initialize LIME explainer on training data
66
+ explainer = LimeTabularExplainer(
67
+ training_data=X_train.values,
68
+ feature_names=X_train.columns.tolist(),
69
+ class_names=['not_fraud', 'fraud'],
70
+ mode='classification'
71
+ )
72
+
73
+ def extract_top_features(single_row_df, top_n=3):
74
+ # Generate explanation for the 'fraud' class (label=1)
75
+ exp = explainer.explain_instance(
76
+ single_row_df.values[0],
77
+ lambda arr: model.predict_proba(
78
+ pd.DataFrame(arr, columns=X_train.columns.tolist())
79
+ ),
80
+ num_features=top_n
81
+ )
82
+
83
+ # Get list of (feature, weight) for the fraud prediction
84
+ feature_weights = exp.as_list(label=1)
85
+ # Format the top features into a string
86
+ formatted = "Transaction's top features:\n"
87
+ formatted += "\n".join(f" - {feat}: weight {weight:.4f}" for feat, weight in feature_weights)
88
  return formatted