keanteng commited on
Commit
cb74148
·
verified ·
1 Parent(s): e7f2d19

Upload classifier.py

Browse files
Files changed (1) hide show
  1. src/classifier.py +145 -0
src/classifier.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import joblib
3
+
4
+ def get_encoding(category, value):
5
+ encodings = {
6
+ 'person_gender': {
7
+ 'female': 0,
8
+ 'male': 1
9
+ },
10
+ 'person_education': {
11
+ 'associate': 0,
12
+ 'bachelor': 1,
13
+ 'doctorate': 2,
14
+ 'high school': 3,
15
+ 'master': 4
16
+ },
17
+ 'person_home_ownership': {
18
+ 'mortgage': 0,
19
+ 'other': 1,
20
+ 'own': 2,
21
+ 'rent': 3
22
+ },
23
+ 'previous_loan_defaults_on_file': {
24
+ 'no': 0,
25
+ 'yes': 1,
26
+ 0: 0,
27
+ 1: 1
28
+ },
29
+ 'loan_intent': {
30
+ 'debt_consolidation': 0,
31
+ 'education': 1,
32
+ 'home_improvement': 2,
33
+ 'medical': 3,
34
+ 'personal': 4,
35
+ 'venture': 5
36
+ }
37
+ }
38
+
39
+ # Look up the encoding
40
+ return encodings.get(category, {}).get(value, None)
41
+
42
+ def classify_loan(
43
+ person_age: float,
44
+ person_gender: str,
45
+ person_education: str,
46
+ person_income: float,
47
+ person_emp_exp: float,
48
+ person_home_ownership: str,
49
+ loan_amnt: float,
50
+ loan_intent: str,
51
+ loan_int_rate: float,
52
+ loan_percent_income: float,
53
+ cb_person_cred_hist_length: float,
54
+ credit_score: float,
55
+ previous_loan_defaults_on_file: str
56
+ ) -> dict[float, str, str, float, float, str, float, str, float, float, float, float, str | str]:
57
+ """Set the borrower personal details for loan approval classification. (mock API).
58
+
59
+ Args:
60
+ person_age (float): The age of the borrower.
61
+ person_gender: Gender of the borrower. Either `male` or `female`.
62
+ person_education: The education level of the borrower.Either 'associate`, `bachelor`, `doctorate`, `master` or `high school`.
63
+ person_income: The income of the borrower.
64
+ person_emp_exp: The years of employment experience of the borrower.
65
+ person_home_ownership: The home ownership status of the borrower. Either `mortage`, `other`, `own` or `rent`.
66
+ loan_amnt: The amount of loan requested.
67
+ loan_intent: The intent of the loan. Either `debt_consolidation`, `education`, `home_improvement`, `medical` or `personal`.
68
+ loan_int_rate: The interest rate of the loan.
69
+ loan_percent_income: The loan amount as a percentage of annual income.
70
+ cb_person_cred_hist_length: The length of the credit history of the borrower.
71
+ credit_score: The credit score of the borrower.
72
+ previous_loan_defaults_on_file: The indicator of previous loan defaults. Either `yes` or `no`.
73
+
74
+ Returns:
75
+ A dictionary containing the loan approval status.
76
+ """
77
+ # make the input data into a dataframe
78
+ input_data = {
79
+ "person_age": person_age,
80
+ "person_gender": person_gender,
81
+ "person_education": person_education,
82
+ "person_income": person_income,
83
+ "person_emp_exp": person_emp_exp,
84
+ "person_home_ownership": person_home_ownership,
85
+ "loan_amnt": loan_amnt,
86
+ "loan_intent": loan_intent,
87
+ "loan_int_rate": loan_int_rate,
88
+ "loan_percent_income": loan_percent_income,
89
+ "cb_person_cred_hist_length": cb_person_cred_hist_length,
90
+ "credit_score": credit_score,
91
+ "previous_loan_defaults_on_file": previous_loan_defaults_on_file
92
+ }
93
+ input_df = pd.DataFrame([input_data])
94
+ print("### This is the input data:")
95
+ print(input_df.head())
96
+ # scale the input data
97
+ means_stds = pd.read_csv("data/means_stds.csv")
98
+ means_stds.set_index('column', inplace=True)
99
+ columns = ["person_age", "person_income", "person_emp_exp", "loan_amnt",
100
+ "loan_int_rate", "loan_percent_income", "cb_person_cred_hist_length",
101
+ "credit_score"]
102
+ for column in columns:
103
+ mean = means_stds.loc[column, 'mean']
104
+ std = means_stds.loc[column, 'std']
105
+ input_df[column] = (input_df[column] - mean) / std
106
+
107
+ # convert the categorical variables to class
108
+ categorical_columns = [
109
+ "person_gender", "person_education", "person_home_ownership",
110
+ "loan_intent", "previous_loan_defaults_on_file"
111
+ ]
112
+ for column in categorical_columns:
113
+ input_df[column] = input_df[column].apply(lambda x: get_encoding(column, x))
114
+
115
+ print("### This is the processed input data: ")
116
+ print(input_df.head())
117
+
118
+ # load classifier at model/logistic_regression.pkl
119
+ classifier = joblib.load("model/random_forest_model.pkl")
120
+
121
+ # reorder the columns to match the training data
122
+ ordered_columns = [
123
+ "person_gender",
124
+ "person_education",
125
+ "person_home_ownership",
126
+ "loan_intent",
127
+ "previous_loan_defaults_on_file",
128
+ "person_age",
129
+ "person_income",
130
+ "person_emp_exp",
131
+ "loan_amnt",
132
+ "loan_int_rate",
133
+ "loan_percent_income",
134
+ "cb_person_cred_hist_length",
135
+ "credit_score"
136
+ ]
137
+ input_df = input_df[ordered_columns]
138
+
139
+ # make prediction
140
+ prediction = classifier.predict(input_df)
141
+
142
+ if prediction[0] == 1:
143
+ return "Your loan application has been approved."
144
+ else:
145
+ return "Your loan application has been rejected."