Initial commit
Browse files- .gitattributes +1 -0
- app.py +198 -0
- card_transdata.csv +3 -0
- model.py +88 -0
- prompts.md +59 -0
- reason.py +104 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
card_transdata.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from reason import assess_fraud
|
3 |
+
|
4 |
+
css = """
|
5 |
+
.app-title {
|
6 |
+
margin: 1rem auto;
|
7 |
+
text-align: center;
|
8 |
+
}
|
9 |
+
|
10 |
+
.outer-container {
|
11 |
+
gap: 3rem;
|
12 |
+
}
|
13 |
+
|
14 |
+
.main-col-one, .main-col-two {
|
15 |
+
gap: 3rem;
|
16 |
+
}
|
17 |
+
|
18 |
+
.input-elem-row {
|
19 |
+
align-items: center;
|
20 |
+
gap: 2rem;
|
21 |
+
}
|
22 |
+
|
23 |
+
.input-elem-header p {
|
24 |
+
font-weight: 700;
|
25 |
+
font-size: 1.15rem;
|
26 |
+
}
|
27 |
+
|
28 |
+
.input-elem-desc p {
|
29 |
+
font-size: 0.9rem;
|
30 |
+
opacity: 0.6;
|
31 |
+
}
|
32 |
+
|
33 |
+
.input-elem-col-one {
|
34 |
+
gap: 0;
|
35 |
+
}
|
36 |
+
|
37 |
+
.custom-input-elem-one span {
|
38 |
+
display: none;
|
39 |
+
}
|
40 |
+
|
41 |
+
.custom-input-elem-one input {
|
42 |
+
border-radius: 6px !important;
|
43 |
+
}
|
44 |
+
|
45 |
+
.custom-input-elem-one input::-webkit-outer-spin-button,
|
46 |
+
.custom-input-elem-one input::-webkit-inner-spin-button {
|
47 |
+
-webkit-appearance: none;
|
48 |
+
margin: 0;
|
49 |
+
}
|
50 |
+
|
51 |
+
.custom-input-elem-one input[type=number] {
|
52 |
+
-moz-appearance: textfield;
|
53 |
+
}
|
54 |
+
|
55 |
+
div:has(.custom-input-elem-one), div:has(.custom-input-two), .custom-input-elem-one, .custom-input-two {
|
56 |
+
padding: 0;
|
57 |
+
margin: 0;
|
58 |
+
border: none;
|
59 |
+
background: none;
|
60 |
+
}
|
61 |
+
|
62 |
+
.custom-input-two {
|
63 |
+
display: flex;
|
64 |
+
justify-content: center;
|
65 |
+
}
|
66 |
+
|
67 |
+
.custom-input-two input[type=checkbox] {
|
68 |
+
height: 1.5rem;
|
69 |
+
width: 1.5rem;
|
70 |
+
border-width: 2px;
|
71 |
+
}
|
72 |
+
|
73 |
+
.button-row {
|
74 |
+
margin: 3rem auto;
|
75 |
+
}
|
76 |
+
|
77 |
+
.fraud-button {
|
78 |
+
font-weight: 700;
|
79 |
+
border: none;
|
80 |
+
padding: 0.5rem 1rem;
|
81 |
+
border-radius: 10px;
|
82 |
+
font-size: 1.15rem;
|
83 |
+
width: 100%;
|
84 |
+
max-width: 500px;
|
85 |
+
display: block;
|
86 |
+
margin: 0 auto;
|
87 |
+
transition: 0.3s ease;
|
88 |
+
}
|
89 |
+
|
90 |
+
.fraud-button:hover,
|
91 |
+
.fraud-button:focus {
|
92 |
+
outline: none;
|
93 |
+
box-shadow: rgba(100, 100, 111, 0.2) 0px 7px 29px 0px;
|
94 |
+
}
|
95 |
+
|
96 |
+
@media screen and (max-width: 600px) {
|
97 |
+
.fraud-button {
|
98 |
+
width: 100%;
|
99 |
+
max-width: 100%;
|
100 |
+
}
|
101 |
+
|
102 |
+
.custom-input-two {
|
103 |
+
justify-content: flex-start !important;
|
104 |
+
}
|
105 |
+
}
|
106 |
+
|
107 |
+
.output-box textarea {
|
108 |
+
font-size: 1rem;
|
109 |
+
}
|
110 |
+
|
111 |
+
"""
|
112 |
+
|
113 |
+
|
114 |
+
with gr.Blocks(theme=gr.themes.Base(font=[gr.themes.GoogleFont("Rubik"), "Arial", "sans-serif"]), css=css) as demo:
|
115 |
+
gr.Markdown("# AI-Powered Fraud Detection for Merchants & Analysts", elem_classes="app-title")
|
116 |
+
with gr.Row(elem_classes="outer-container"):
|
117 |
+
with gr.Column(elem_classes="main-col-one"):
|
118 |
+
with gr.Row(elem_classes="input-elem-row"):
|
119 |
+
with gr.Column(elem_classes="input-elem-col-one"):
|
120 |
+
gr.Markdown("Transaction Amount ($)", elem_classes="input-elem-header")
|
121 |
+
gr.Markdown("The total amount of the transaction in US dollars", elem_classes="input-elem-desc")
|
122 |
+
with gr.Column():
|
123 |
+
transactionAmount = gr.Number(value=None, elem_classes="custom-input-elem-one")
|
124 |
+
|
125 |
+
with gr.Row(elem_classes="input-elem-row"):
|
126 |
+
with gr.Column(elem_classes="input-elem-col-one"):
|
127 |
+
gr.Markdown("Customer Median Spend ($)", elem_classes="input-elem-header")
|
128 |
+
gr.Markdown("This customerβs typical (median) purchase amount. Used to detect unusual spending.", elem_classes="input-elem-desc")
|
129 |
+
with gr.Column():
|
130 |
+
customerMedianSpend = gr.Number(value=None, elem_classes="custom-input-elem-one")
|
131 |
+
|
132 |
+
with gr.Row(elem_classes="input-elem-row"):
|
133 |
+
with gr.Column(elem_classes="input-elem-col-one"):
|
134 |
+
gr.Markdown("Distance From Home (km)", elem_classes="input-elem-header")
|
135 |
+
gr.Markdown("How far the customer was from their registered address when the transaction occurred.", elem_classes="input-elem-desc")
|
136 |
+
with gr.Column():
|
137 |
+
distanceFromHome = gr.Number(value=None, elem_classes="custom-input-elem-one")
|
138 |
+
|
139 |
+
with gr.Row(elem_classes="input-elem-row"):
|
140 |
+
with gr.Column(elem_classes="input-elem-col-one"):
|
141 |
+
gr.Markdown("Distance From Last Transaction (km)", elem_classes="input-elem-header")
|
142 |
+
gr.Markdown("Distance between this transaction and the customer's previous one, in kilometers. Helps detect impossible travel.", elem_classes="input-elem-desc")
|
143 |
+
with gr.Column():
|
144 |
+
distanceFromLastTransaction = gr.Number(value=None, elem_classes="custom-input-elem-one")
|
145 |
+
|
146 |
+
|
147 |
+
with gr.Column(elem_classes="main-col-two"):
|
148 |
+
with gr.Row(elem_classes="input-elem-row"):
|
149 |
+
with gr.Column(elem_classes="input-elem-col-one"):
|
150 |
+
gr.Markdown("Repeat Retailer", elem_classes="input-elem-header")
|
151 |
+
gr.Markdown("Has the customer made purchases from this merchant before?", elem_classes="input-elem-desc")
|
152 |
+
with gr.Column():
|
153 |
+
repeatRetailer = gr.Checkbox(label="", elem_classes="custom-input-two")
|
154 |
+
|
155 |
+
with gr.Row(elem_classes="input-elem-row"):
|
156 |
+
with gr.Column(elem_classes="input-elem-col-one"):
|
157 |
+
gr.Markdown("Used Chip", elem_classes="input-elem-header")
|
158 |
+
gr.Markdown("Was the transaction done using the credit card's chip (EMV) instead of swipe or manual entry?", elem_classes="input-elem-desc")
|
159 |
+
with gr.Column():
|
160 |
+
usedChip = gr.Checkbox(label="", elem_classes="custom-input-two")
|
161 |
+
|
162 |
+
with gr.Row(elem_classes="input-elem-row"):
|
163 |
+
with gr.Column(elem_classes="input-elem-col-one"):
|
164 |
+
gr.Markdown("Used PIN", elem_classes="input-elem-header")
|
165 |
+
gr.Markdown("Was a PIN number entered during the transaction?", elem_classes="input-elem-desc")
|
166 |
+
with gr.Column():
|
167 |
+
usedPin = gr.Checkbox(label="", elem_classes="custom-input-two")
|
168 |
+
|
169 |
+
with gr.Row(elem_classes="input-elem-row"):
|
170 |
+
with gr.Column(elem_classes="input-elem-col-one"):
|
171 |
+
gr.Markdown("Online Order", elem_classes="input-elem-header")
|
172 |
+
gr.Markdown("Was this transaction placed through an online store (e.g. e-commerce, app)?", elem_classes="input-elem-desc")
|
173 |
+
with gr.Column():
|
174 |
+
onlineOrder = gr.Checkbox(label="", elem_classes="custom-input-two")
|
175 |
+
|
176 |
+
with gr.Row(elem_classes="button-row"):
|
177 |
+
checkFraud = gr.Button("Check for Fraud", elem_classes="fraud-button")
|
178 |
+
|
179 |
+
with gr.Row():
|
180 |
+
output_box = gr.Textbox(label="Output", lines=3, elem_classes="output-box")
|
181 |
+
checkFraud.click(
|
182 |
+
fn=assess_fraud,
|
183 |
+
inputs=[
|
184 |
+
transactionAmount,
|
185 |
+
customerMedianSpend,
|
186 |
+
distanceFromHome,
|
187 |
+
distanceFromLastTransaction,
|
188 |
+
repeatRetailer,
|
189 |
+
usedChip,
|
190 |
+
usedPin,
|
191 |
+
onlineOrder
|
192 |
+
],
|
193 |
+
outputs=output_box
|
194 |
+
)
|
195 |
+
|
196 |
+
|
197 |
+
if __name__ == "__main__":
|
198 |
+
demo.launch()
|
card_transdata.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7013c329bae9ef0ef32d65dbeb095694f0c7cd6c00ff74b2d0087fa1c67b8717
|
3 |
+
size 76277977
|
model.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.ensemble import HistGradientBoostingClassifier
|
3 |
+
from sklearn.model_selection import train_test_split
|
4 |
+
from sklearn.metrics import classification_report, confusion_matrix
|
5 |
+
import joblib
|
6 |
+
from lime.lime_tabular import LimeTabularExplainer
|
7 |
+
|
8 |
+
# Load data
|
9 |
+
data = pd.read_csv('src/card_transdata.csv')
|
10 |
+
|
11 |
+
# Features and target
|
12 |
+
X = data.drop(columns=['fraud'])
|
13 |
+
y = data['fraud']
|
14 |
+
|
15 |
+
# Train/test split
|
16 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
17 |
+
X, y,
|
18 |
+
test_size=0.3,
|
19 |
+
stratify=y,
|
20 |
+
random_state=42
|
21 |
+
)
|
22 |
+
|
23 |
+
# Initialize a gradient-boosting classifier with class imbalance handling
|
24 |
+
model = HistGradientBoostingClassifier(
|
25 |
+
loss="log_loss",
|
26 |
+
class_weight="balanced",
|
27 |
+
learning_rate=0.05,
|
28 |
+
max_iter=200,
|
29 |
+
max_depth=8,
|
30 |
+
random_state=42
|
31 |
+
)
|
32 |
+
|
33 |
+
# Train on the training set
|
34 |
+
model.fit(X_train, y_train)
|
35 |
+
|
36 |
+
# Predict on the test set
|
37 |
+
y_pred = model.predict(X_test)
|
38 |
+
|
39 |
+
"""Comment out the following lines to skip evaluation in prod"""
|
40 |
+
# print("\nTesting with all transactions...")
|
41 |
+
|
42 |
+
# # Evaluate
|
43 |
+
# print("Classification Report:")
|
44 |
+
# print(classification_report(y_test, y_pred, digits=4))
|
45 |
+
|
46 |
+
# print("\nConfusion Matrix:")
|
47 |
+
# print(confusion_matrix(y_test, y_pred))
|
48 |
+
|
49 |
+
|
50 |
+
# # Save and load the model using joblib
|
51 |
+
def save_model(model, filename='fraud_model.pkl'):
|
52 |
+
"""Saves the trained model to a file."""
|
53 |
+
joblib.dump(model, filename)
|
54 |
+
#print(f"Model saved to {filename}")
|
55 |
+
|
56 |
+
save_model(model)
|
57 |
+
|
58 |
+
def load_model(filename='fraud_model.pkl'):
|
59 |
+
"""Loads the saved model from a file."""
|
60 |
+
model = joblib.load(filename)
|
61 |
+
#print(f"Model loaded from {filename}")
|
62 |
+
return model
|
63 |
+
|
64 |
+
|
65 |
+
# Initialize LIME explainer on training data
|
66 |
+
explainer = LimeTabularExplainer(
|
67 |
+
training_data=X_train.values,
|
68 |
+
feature_names=X_train.columns.tolist(),
|
69 |
+
class_names=['not_fraud', 'fraud'],
|
70 |
+
mode='classification'
|
71 |
+
)
|
72 |
+
|
73 |
+
def extract_top_features(single_row_df, top_n=3):
|
74 |
+
# Generate explanation for the 'fraud' class (label=1)
|
75 |
+
exp = explainer.explain_instance(
|
76 |
+
single_row_df.values[0],
|
77 |
+
lambda arr: model.predict_proba(
|
78 |
+
pd.DataFrame(arr, columns=X_train.columns.tolist())
|
79 |
+
),
|
80 |
+
num_features=top_n
|
81 |
+
)
|
82 |
+
|
83 |
+
# Get list of (feature, weight) for the fraud prediction
|
84 |
+
feature_weights = exp.as_list(label=1)
|
85 |
+
# Format the top features into a string
|
86 |
+
formatted = "Transaction's top features:\n"
|
87 |
+
formatted += "\n".join(f" - {feat}: weight {weight:.4f}" for feat, weight in feature_weights)
|
88 |
+
return formatted
|
prompts.md
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Fraud Detection Prompts
|
2 |
+
|
3 |
+
|
4 |
+
### Placeholder Prompt
|
5 |
+
```
|
6 |
+
f"Transaction classified as **{status}**.\n"
|
7 |
+
f"Top contributing factors according the LIME module:\n{feat_str}\n\n"
|
8 |
+
"Please explain why and recommend next investigative steps."
|
9 |
+
```
|
10 |
+
|
11 |
+
### Prompt V1
|
12 |
+
```
|
13 |
+
"You are a professional fraud analyst assisting in reviewing a flagged transaction.\n"
|
14 |
+
f"The transaction is classified as **{status}**.\n"
|
15 |
+
f"The top contributing factors according to the LIME module:\n{feat_str}\n\n"
|
16 |
+
"Briefly explain why this transaction was flagged as such based on the top contributing features.\n"
|
17 |
+
"Assess the likelihood of fraud based on the features and their influence\n"
|
18 |
+
"Recommend next investigative steps that a business user or fraud team should take.\n"
|
19 |
+
"Respond in a formal but concise tone. Your explanation should be understandable to both technical and non-technical users.\n"
|
20 |
+
```
|
21 |
+
|
22 |
+
### Prompt V2
|
23 |
+
*implements chain of thought lightly*
|
24 |
+
```
|
25 |
+
"You are a professional fraud analyst assisting in reviewing a flagged transaction.\n"
|
26 |
+
f"The transaction is classified as **{status}**.\n"
|
27 |
+
f"The top contributing factors according to the LIME module:\n{feat_str}\n\n"
|
28 |
+
"Think step-by-step through the features and their weights to understand the model's reasoning.\n"
|
29 |
+
"Then:\n"
|
30 |
+
"Briefly explain why this transaction was flagged as such based on the top contributing features.\n"
|
31 |
+
"Assess the likelihood of fraud based on the features and their influence\n"
|
32 |
+
"Recommend next investigative steps that a business user or fraud team should take.\n"
|
33 |
+
"Respond in a formal but concise tone. Your explanation should be understandable to both technical and non-technical users.\n"
|
34 |
+
```
|
35 |
+
|
36 |
+
### Explicit Chain of Thought
|
37 |
+
*add this to the prompt to perform verbose reasoning before making a decision*
|
38 |
+
```
|
39 |
+
"Walk through your reasoning step-by-step before reaching your conclusions. Show how each feature contributes to your fraud assessment.\n"
|
40 |
+
```
|
41 |
+
|
42 |
+
### Optional Guidance For Output Formatting
|
43 |
+
*add this to the prompt for formatting the output from the LLM*
|
44 |
+
```
|
45 |
+
Format your response using **Markdown** as follows:
|
46 |
+
|
47 |
+
**Prediction**: FRAUD
|
48 |
+
**Likelihood of Fraud**: (Low / Moderate / High)
|
49 |
+
|
50 |
+
**Reasoning**:
|
51 |
+
- Bullet point 1
|
52 |
+
- Bullet point 2
|
53 |
+
- Bullet point 3
|
54 |
+
|
55 |
+
**Recommended Next Steps**:
|
56 |
+
- Step 1
|
57 |
+
- Step 2
|
58 |
+
- Step 3
|
59 |
+
```
|
reason.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_cpp import Llama
|
2 |
+
import pandas as pd
|
3 |
+
from model import load_model, extract_top_features
|
4 |
+
|
5 |
+
# load fraud classifier from src/model.py
|
6 |
+
fraud_model = None
|
7 |
+
def get_fraud_model():
|
8 |
+
global fraud_model
|
9 |
+
if fraud_model is None:
|
10 |
+
fraud_model = load_model()
|
11 |
+
return fraud_model
|
12 |
+
|
13 |
+
|
14 |
+
# initialize the LLM and tokenizer
|
15 |
+
# using the model from Hugging Face
|
16 |
+
llm = Llama.from_pretrained(
|
17 |
+
repo_id="lmstudio-community/Nemotron-Research-Reasoning-Qwen-1.5B-GGUF",
|
18 |
+
filename="Nemotron-Research-Reasoning-Qwen-1.5B-Q4_K_M.gguf",
|
19 |
+
verbose=False,
|
20 |
+
n_ctx=131072, # Match training context length
|
21 |
+
#n_gpu_layers=24 ## Optional for GPU acceleration
|
22 |
+
)
|
23 |
+
|
24 |
+
# runs the LLM reasoning
|
25 |
+
def llm_reason(prompt: str) -> str:
|
26 |
+
output = llm.create_chat_completion(
|
27 |
+
messages = [
|
28 |
+
{
|
29 |
+
"role": "user",
|
30 |
+
"content": prompt
|
31 |
+
}
|
32 |
+
]
|
33 |
+
)
|
34 |
+
|
35 |
+
return output["choices"][0]["message"]["content"]
|
36 |
+
|
37 |
+
# wrapper to build context and call the LLM
|
38 |
+
def build_and_call_llm(transaction_df: pd.DataFrame) -> str:
|
39 |
+
# 1) get a fraud prediction + top features
|
40 |
+
model = get_fraud_model()
|
41 |
+
pred = model.predict(transaction_df)[0]
|
42 |
+
feature_contributions = extract_top_features(transaction_df, top_n=3)
|
43 |
+
|
44 |
+
# 2) assemble a minimal prompt temporarily
|
45 |
+
status = "FRAUD" if pred == 1 else "NORMAL"
|
46 |
+
prompt = (
|
47 |
+
"You are a professional fraud analyst assisting in reviewing a flagged transaction.\n"
|
48 |
+
f"The transaction is classified as **{status}**.\n"
|
49 |
+
f"The top contributing factors and their weights:\n{feature_contributions}\n\n"
|
50 |
+
"Think step-by-step through the features and their weights to understand the classifier's reasoning.\n"
|
51 |
+
"Then:\n"
|
52 |
+
"Explain why this transaction was flagged by each conrtributing factor, explaining why and how the feature contributes to the classification.\n"
|
53 |
+
"Assess the likelihood of fraud based on the features and their influence, including why their influence is drastic in terms of cause and effect.\n"
|
54 |
+
"Recommend the specific and impactful next investigative steps that a business user or fraud team should take (with ample detail) in real life, independent of the features.\n"
|
55 |
+
"Respond in a formal and explainatory tone (don't be too concise). Your explanation should be understandable to both technical and non-technical users.\n"
|
56 |
+
" Format your response using **Markdown** as follows:\n"
|
57 |
+
"\n"
|
58 |
+
" **Prediction**: FRAUD \n"
|
59 |
+
" **Likelihood of Fraud**: (Low / Moderate / High) \n"
|
60 |
+
"\n"
|
61 |
+
" **Reasoning**: \n"
|
62 |
+
" - Bullet point 1 \n"
|
63 |
+
" - Bullet point 2 \n"
|
64 |
+
" - Bullet point 3 \n"
|
65 |
+
"\n"
|
66 |
+
" **Recommended Next Steps**: \n"
|
67 |
+
" - Step 1 \n"
|
68 |
+
" - Step 2 \n"
|
69 |
+
" - Step 3\n"
|
70 |
+
)
|
71 |
+
|
72 |
+
return llm_reason(prompt)
|
73 |
+
|
74 |
+
|
75 |
+
|
76 |
+
# βββ ENTRYPOINT ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
77 |
+
def assess_fraud(distanceFromHome, distanceFromLastTransaction, transactionAmount, customerMedianSpend, repeatRetailer, usedChip, usedPin, onlineOrder):
|
78 |
+
|
79 |
+
data = {
|
80 |
+
"distance_from_home": distanceFromHome,
|
81 |
+
"distance_from_last_transaction": distanceFromLastTransaction,
|
82 |
+
"ratio_to_median_purchase_price": transactionAmount / customerMedianSpend, # Ratio of purchased price transaction to median purchase price
|
83 |
+
"repeat_retailer": float(repeatRetailer), # These variables are boolean and must be converted to float to match the training dataset
|
84 |
+
"used_chip": float(usedChip),
|
85 |
+
"used_pin_number": float(usedPin),
|
86 |
+
"online_order": float(onlineOrder),
|
87 |
+
}
|
88 |
+
df_row = pd.DataFrame([data])
|
89 |
+
|
90 |
+
|
91 |
+
# load data, build context, and await the LLMβs explanation
|
92 |
+
explanation = build_and_call_llm(df_row)
|
93 |
+
|
94 |
+
parts = explanation.split('</think>', 1)
|
95 |
+
|
96 |
+
if len(parts) > 1:
|
97 |
+
after_think = parts[1].strip()
|
98 |
+
return(after_think)
|
99 |
+
else:
|
100 |
+
return("No </think> tag found.")
|
101 |
+
|
102 |
+
# if __name__ == "__main__":
|
103 |
+
# df = pd.read_csv('src/card_transdata.csv').drop(columns=['fraud']).iloc[0:1] ## Data for testing
|
104 |
+
# assess_fraud(df)
|