Spaces:

Agents-MCP-Hackathon
/

credit-card-fraud-detection

Sleeping

App Files Files Community

OsamaMIT commited on Jun 10

Commit

096b78b

verified ·

1 Parent(s): 9a13cf3

Update model.py

Browse files

Files changed (1) hide show

model.py +87 -87

model.py CHANGED Viewed

@@ -1,88 +1,88 @@
-import pandas as pd
-from sklearn.ensemble import HistGradientBoostingClassifier
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import classification_report, confusion_matrix
-import joblib
-from lime.lime_tabular import LimeTabularExplainer
-# Load data
-data = pd.read_csv('src/card_transdata.csv')
-# Features and target
-X = data.drop(columns=['fraud'])
-y = data['fraud']
-# Train/test split
-X_train, X_test, y_train, y_test = train_test_split(
-    X, y,
-    test_size=0.3,
-    stratify=y,
-    random_state=42
-)
-# Initialize a gradient-boosting classifier with class imbalance handling
-model = HistGradientBoostingClassifier(
-    loss="log_loss",
-    class_weight="balanced",
-    learning_rate=0.05,
-    max_iter=200,
-    max_depth=8,
-    random_state=42
-)
-# Train on the training set
-model.fit(X_train, y_train)
-# Predict on the test set
-y_pred = model.predict(X_test)
-"""Comment out the following lines to skip evaluation in prod"""
-# print("\nTesting with all transactions...")
-# # Evaluate
-# print("Classification Report:")
-# print(classification_report(y_test, y_pred, digits=4))
-# print("\nConfusion Matrix:")
-# print(confusion_matrix(y_test, y_pred))
-# # Save and load the model using joblib
-def save_model(model, filename='fraud_model.pkl'):
-    """Saves the trained model to a file."""
-    joblib.dump(model, filename)
-    #print(f"Model saved to {filename}")
-save_model(model)
-def load_model(filename='fraud_model.pkl'):
-    """Loads the saved model from a file."""
-    model = joblib.load(filename)
-    #print(f"Model loaded from {filename}")
-    return model
-# Initialize LIME explainer on training data
-explainer = LimeTabularExplainer(
-    training_data=X_train.values,
-    feature_names=X_train.columns.tolist(),
-    class_names=['not_fraud', 'fraud'],
-    mode='classification'
-)
-def extract_top_features(single_row_df, top_n=3):
-    # Generate explanation for the 'fraud' class (label=1)
-    exp = explainer.explain_instance(
-        single_row_df.values[0],
-        lambda arr: model.predict_proba(
-            pd.DataFrame(arr, columns=X_train.columns.tolist())
-        ),
-        num_features=top_n
-    )
-    # Get list of (feature, weight) for the fraud prediction
-    feature_weights = exp.as_list(label=1)
-    # Format the top features into a string
-    formatted = "Transaction's top features:\n"
-    formatted += "\n".join(f"  - {feat}: weight {weight:.4f}" for feat, weight in feature_weights)
     return formatted

+import pandas as pd
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, confusion_matrix
+import joblib
+from lime.lime_tabular import LimeTabularExplainer
+# Load data
+data = pd.read_csv('card_transdata.csv')
+# Features and target
+X = data.drop(columns=['fraud'])
+y = data['fraud']
+# Train/test split
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y,
+    test_size=0.3,
+    stratify=y,
+    random_state=42
+)
+# Initialize a gradient-boosting classifier with class imbalance handling
+model = HistGradientBoostingClassifier(
+    loss="log_loss",
+    class_weight="balanced",
+    learning_rate=0.05,
+    max_iter=200,
+    max_depth=8,
+    random_state=42
+)
+# Train on the training set
+model.fit(X_train, y_train)
+# Predict on the test set
+y_pred = model.predict(X_test)
+"""Comment out the following lines to skip evaluation in prod"""
+# print("\nTesting with all transactions...")
+# # Evaluate
+# print("Classification Report:")
+# print(classification_report(y_test, y_pred, digits=4))
+# print("\nConfusion Matrix:")
+# print(confusion_matrix(y_test, y_pred))
+# # Save and load the model using joblib
+def save_model(model, filename='fraud_model.pkl'):
+    """Saves the trained model to a file."""
+    joblib.dump(model, filename)
+    #print(f"Model saved to {filename}")
+save_model(model)
+def load_model(filename='fraud_model.pkl'):
+    """Loads the saved model from a file."""
+    model = joblib.load(filename)
+    #print(f"Model loaded from {filename}")
+    return model
+# Initialize LIME explainer on training data
+explainer = LimeTabularExplainer(
+    training_data=X_train.values,
+    feature_names=X_train.columns.tolist(),
+    class_names=['not_fraud', 'fraud'],
+    mode='classification'
+)
+def extract_top_features(single_row_df, top_n=3):
+    # Generate explanation for the 'fraud' class (label=1)
+    exp = explainer.explain_instance(
+        single_row_df.values[0],
+        lambda arr: model.predict_proba(
+            pd.DataFrame(arr, columns=X_train.columns.tolist())
+        ),
+        num_features=top_n
+    )
+    # Get list of (feature, weight) for the fraud prediction
+    feature_weights = exp.as_list(label=1)
+    # Format the top features into a string
+    formatted = "Transaction's top features:\n"
+    formatted += "\n".join(f"  - {feat}: weight {weight:.4f}" for feat, weight in feature_weights)
     return formatted