ckoozzzu commited on
Commit
dbacacd
·
verified ·
1 Parent(s): ea2d102

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
MLBaseModel.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class MLBaseModel(nn.Module):
5
+ def __init__(self, input_dim):
6
+ super(MLBaseModel, self).__init__()
7
+ self.fc1 = nn.Linear(input_dim, 256)
8
+ self.fc2 = nn.Linear(256, 128)
9
+ self.fc3 = nn.Linear(128, 64)
10
+ self.fc4 = nn.Linear(64, 2) # Output 2 values: sale_price and days_on_market
11
+
12
+ def forward(self, x):
13
+ x = torch.relu(self.fc1(x))
14
+ x = torch.relu(self.fc2(x))
15
+ x = torch.relu(self.fc3(x))
16
+ x = self.fc4(x)
17
+ return x
MLBaseModelDriver.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import sys
3
+ import pandas as pd
4
+ from typing import TypedDict, Optional, Tuple
5
+ import datetime
6
+ import math
7
+ import importlib.util
8
+ from huggingface_hub import hf_hub_download
9
+ import pickle
10
+
11
+
12
+ """
13
+ Data container class representing the data shape of the synapse coming into `run_inference`
14
+ """
15
+
16
+
17
+ class ProcessedSynapse(TypedDict):
18
+ id: Optional[str]
19
+ nextplace_id: Optional[str]
20
+ property_id: Optional[str]
21
+ listing_id: Optional[str]
22
+ address: Optional[str]
23
+ city: Optional[str]
24
+ state: Optional[str]
25
+ zip_code: Optional[str]
26
+ price: Optional[float]
27
+ beds: Optional[int]
28
+ baths: Optional[float]
29
+ sqft: Optional[int]
30
+ lot_size: Optional[int]
31
+ year_built: Optional[int]
32
+ days_on_market: Optional[int]
33
+ latitude: Optional[float]
34
+ longitude: Optional[float]
35
+ property_type: Optional[str]
36
+ last_sale_date: Optional[str]
37
+ hoa_dues: Optional[float]
38
+ query_date: Optional[str]
39
+
40
+
41
+ """
42
+ This class must do two things
43
+ 1) The constructor must load the model
44
+ 2) This class must implement a method called `run_inference` that takes the input data and returns a tuple
45
+ of float, str representing the predicted sale price and the predicted sale date.
46
+ """
47
+
48
+
49
+ class MLBaseModelDriver:
50
+
51
+ def __init__(self):
52
+ self.model, self.label_encoder, self.scaler = self.load_model()
53
+
54
+ def load_model(self) -> Tuple[any, any, any]:
55
+ """
56
+ load the model and model parameters
57
+ :return: model, label encoder, and scaler
58
+ """
59
+ print(f"Loading model...")
60
+ model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
61
+ model_class = self._import_model_class(model_class_file)
62
+
63
+ model = model_class(input_dim=4)
64
+ state_dict = torch.load(model_file, weights_only=False)
65
+ model.load_state_dict(state_dict)
66
+ model.eval()
67
+
68
+ # Load additional artifacts
69
+ with open(scaler_file, 'rb') as f:
70
+ scaler = pickle.load(f)
71
+
72
+ with open(label_encoders_file, 'rb') as f:
73
+ label_encoders = pickle.load(f)
74
+
75
+ print(f"Model Loaded.")
76
+ return model, label_encoders, scaler
77
+
78
+ def _download_model_files(self) -> Tuple[str, str, str, str]:
79
+ """
80
+ download files from hugging face
81
+ :return: downloaded files
82
+ """
83
+ model_path = "Nickel5HF/NextPlace"
84
+
85
+ # Download the model files from the Hugging Face Hub
86
+ model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
87
+ scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
88
+ label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
89
+ model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
90
+
91
+ # Load the model and artifacts
92
+ return model_file, scaler_file, label_encoders_file, model_class_file
93
+
94
+ def _import_model_class(self, model_class_file):
95
+ """
96
+ import the model class and instantiate it
97
+ :param model_class_file: file path to the model class
98
+ :return: None
99
+ """
100
+ # Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader
101
+ module_name = "MLBaseModel"
102
+ spec = importlib.util.spec_from_file_location(module_name, model_class_file)
103
+ model_module = importlib.util.module_from_spec(spec)
104
+ sys.modules[module_name] = model_module
105
+ spec.loader.exec_module(model_module)
106
+
107
+ if hasattr(model_module, "MLBaseModel"):
108
+ return model_module.MLBaseModel
109
+ else:
110
+ raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
111
+
112
+ def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
113
+ """
114
+ run inference using the MLBaseModel
115
+ :param input_data: synapse from the validator
116
+ :return: the predicted sale price and date
117
+ """
118
+ input_tensor = self._preprocess_input(input_data)
119
+
120
+ with torch.no_grad():
121
+ prediction = self.model(input_tensor)
122
+ predicted_sale_price, predicted_days_on_market = prediction[0].numpy()
123
+ predicted_days_on_market = math.floor(predicted_days_on_market)
124
+ predicted_sale_date = self._sale_date_predictor(input_data['days_on_market'], predicted_days_on_market)
125
+
126
+ return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")
127
+
128
+ def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date:
129
+ """
130
+ convert predicted days on market to a sale date
131
+ :param days_on_market: number of days this home has been on the market
132
+ :param predicted_days_on_market: the predicted number of days for this home on the market
133
+ :return: the predicted sale date
134
+ """
135
+ if days_on_market < predicted_days_on_market:
136
+ days_until_sale = predicted_days_on_market - days_on_market
137
+ sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
138
+ return sale_date
139
+ else:
140
+ return datetime.date.today() + datetime.timedelta(days=1)
141
+
142
+ def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
143
+ """
144
+ preprocess the input for inference
145
+ :param data: synapse from the validator
146
+ :return: tensor representing the synapse
147
+ """
148
+ df = pd.DataFrame([data])
149
+ default_beds = 3
150
+ default_sqft = 1500.0
151
+ default_property_type = '6'
152
+ df['beds'] = df['beds'].fillna(default_beds)
153
+ df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft)
154
+ df['property_type'] = df['property_type'].fillna(default_property_type)
155
+ df['property_type'] = df['property_type'].astype(int)
156
+ df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']])
157
+ X = df[['beds', 'sqft', 'property_type', 'price']]
158
+ input_tensor = torch.tensor(X.values, dtype=torch.float32)
159
+ return input_tensor
README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # NextPlace
2
+ - Models for the NextPlace subnet
StatisticalBaseModel.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, TypedDict, Optional
2
+ import datetime
3
+
4
+
5
+ class ProcessedSynapse(TypedDict):
6
+ id: Optional[str]
7
+ nextplace_id: Optional[str]
8
+ property_id: Optional[str]
9
+ listing_id: Optional[str]
10
+ address: Optional[str]
11
+ city: Optional[str]
12
+ state: Optional[str]
13
+ zip_code: Optional[str]
14
+ price: Optional[float]
15
+ beds: Optional[int]
16
+ baths: Optional[float]
17
+ sqft: Optional[int]
18
+ lot_size: Optional[int]
19
+ year_built: Optional[int]
20
+ days_on_market: Optional[int]
21
+ latitude: Optional[float]
22
+ longitude: Optional[float]
23
+ property_type: Optional[str]
24
+ last_sale_date: Optional[str]
25
+ hoa_dues: Optional[float]
26
+ query_date: Optional[str]
27
+ market: Optional[str]
28
+
29
+
30
+ class StatisticalBaseModel:
31
+
32
+ def __init__(self):
33
+ self._load_model()
34
+
35
+ def _load_model(self):
36
+ """
37
+ Perform any actions needed to load the model.
38
+ EX: Establish API connections, download an ML model for inference, etc...
39
+ """
40
+ print("Loading model...")
41
+ # Optional model loading
42
+ print("Model loaded.")
43
+
44
+ def _get_average_for_market(self, market: str) -> int:
45
+ """
46
+ Get the average days on market for a house in a given market
47
+ :param market: the housing market
48
+ :return: the average days on market
49
+ """
50
+ # You probably want to update this based on the current season. Houses sell faster in the summer.
51
+ # Add more logic for other housing markets!
52
+ if market == 'San Francisco':
53
+ return 23
54
+ elif market == 'Los Angeles':
55
+ return 68
56
+ elif market == 'Seattle':
57
+ return 27
58
+ elif market == 'Austin':
59
+ return 78
60
+ elif market == 'Houston':
61
+ return 73
62
+ elif market == 'Chicago':
63
+ return 25
64
+ elif market == 'New York':
65
+ return 20
66
+ elif market == 'Denver':
67
+ return 24
68
+ return 34
69
+
70
+
71
+ def _sale_date_predictor(self, input_data: ProcessedSynapse):
72
+ """
73
+ Calculate the expected sale date based on the national average
74
+ :param days_on_market: number of days this house has been on the market
75
+ :return: the predicted sale date, based on the national average of 34 days
76
+ """
77
+ if 'days_on_market' not in input_data:
78
+ return datetime.date.today() + datetime.timedelta(days=1)
79
+
80
+ if 'market' not in input_data:
81
+ average = 34
82
+
83
+ else:
84
+ average = self._get_average_for_market(input_data['market'])
85
+
86
+ days_on_market = input_data['days_on_market']
87
+ if days_on_market < average:
88
+ days_until_sale = average - days_on_market
89
+ sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
90
+ return sale_date
91
+ else:
92
+ return datetime.date.today() + datetime.timedelta(days=1)
93
+
94
+ def _get_price_multiplier(self, market: str) -> float:
95
+ """
96
+ Calculate the price multiplier based on the market
97
+ :param market: the marked the house is in
98
+ :return: the multiplier for the predicted price
99
+ """
100
+ # You may want to add more logic to check zipcode for more precise price multipliers
101
+ # Add more logic for other housing markets!
102
+ if market == 'San Francisco':
103
+ return 1.18 # 18% above listing
104
+ elif market == 'Los Angeles':
105
+ return 1.2 # 22% above listing
106
+ elif market == 'Seattle':
107
+ return 1.13 # 13% above listing
108
+ elif market == 'Austin':
109
+ return 1.11 # 11% above listing
110
+ elif market == 'Houston':
111
+ return 1.15 # 15% above listing
112
+ elif market == 'Chicago':
113
+ return 1.12 # 12% above listing
114
+ elif market == 'New York':
115
+ return 1.05 # 5% above listing
116
+ elif market == 'Denver':
117
+ return 1.11 # 11% above listing
118
+ return 1.0
119
+
120
+ def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
121
+ """
122
+ Predict the sale price and sale date for the house represented by `input_data`
123
+ :param input_data: a formatted Synapse from the validator, representing a currently listed house
124
+ :return: the predicted sale price and predicted sale date for this home
125
+ """
126
+ listing_price = float(input_data['price']) if 'price' in input_data else 1.0
127
+ sale_multiplier = self._get_price_multiplier(input_data['market']) if 'market' in input_data else 1.0
128
+ predicted_sale_price = listing_price * sale_multiplier
129
+ predicted_sale_date = self._sale_date_predictor(input_data)
130
+ predicted_sale_date = predicted_sale_date.strftime("%Y-%m-%d")
131
+ return predicted_sale_price, predicted_sale_date
model_files/label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fd02a7b03d8323c065fbb3a010f92710b96d568fe18f48922de1b106d0928e9
3
+ size 306
model_files/real_estate_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ead0cc482ac184bf4aff9500dcfd288304a243200621b09574713fb20fa6dbe
3
+ size 2079339
model_files/scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c321473fc77b93773052027f5dc5e2ae03c51b3b9a39c093c674444103caabc
3
+ size 600
real_estate_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39811330ff68ab47c4838bceacc0fb8e87c0120282506d03f1119992996062a0
3
+ size 173496