Spaces:

FarahMohsenSamy1
/

debug

Runtime error

App Files Files Community

FarahMohsenSamy1 commited on Feb 28

Commit

88629d6

verified ·

1 Parent(s): 3ae6a6f

Upload 3 files

Browse files

Files changed (3) hide show

new_user.json +8 -0
train.py +181 -0
transactions.csv +0 -0

new_user.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "user_id": 500,
+    "liked_categories": [
+        "Cosmetics",
+        "HairCare",
+        "Food"
+    ]
+}

train.py ADDED Viewed

	@@ -0,0 +1,181 @@

+# Standard library imports
+import random
+import time
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import numpy as np
+import pandas as pd
+import torch_geometric
+from torch_geometric.nn.conv import MessagePassing
+from torch_geometric.utils import degree
+from tqdm.notebook import tqdm
+from sklearn import preprocessing as pp
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
+from sklearn.model_selection import train_test_split
+import scipy.sparse as sp
+from networkx.algorithms import bipartite
+from networkx.drawing.layout import bipartite_layout
+import datetime
+import os
+import fcntl
+os.makedirs("/app/models", exist_ok=True)
+# Device configuration
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# Load dataset
+df = pd.read_csv('transactions.csv')
+df = df.rename(columns={'Customer_ID': 'userId', 'Rating': 'rating', 'Date': 'timestamp'})
+df_collab = df.drop(['Transaction_ID', 'Quantity', 'Price', 'Product_Category', 'Product_Name', 'Product_Brand'], axis=1)
+df_collab.rename(columns={"Timestamp": "timestamp"}, inplace=True)
+df_collab = df_collab[['userId', 'rating', 'timestamp', 'Item_ID']]
+df_collab = df_collab[df_collab['rating'] >= 3]
+# Train-test split
+train, test = train_test_split(df_collab.values, test_size=0.2, random_state=16)
+train_df = pd.DataFrame(train, columns=df_collab.columns)
+test_df = pd.DataFrame(test, columns=df_collab.columns)
+# Label encoding
+le_user = pp.LabelEncoder()
+le_item = pp.LabelEncoder()
+train_df['user_id_idx'] = le_user.fit_transform(train_df['userId'].values)
+train_df['item_id_idx'] = le_item.fit_transform(train_df['Item_ID'].values)
+train_user_ids = train_df['userId'].unique()
+train_item_ids = train_df['Item_ID'].unique()
+test_df = test_df[
+    (test_df['userId'].isin(train_user_ids)) & (test_df['Item_ID'].isin(train_item_ids))
+]
+test_df['user_id_idx'] = le_user.transform(test_df['userId'].values)
+test_df['item_id_idx'] = le_item.transform(test_df['Item_ID'].values)
+n_users = train_df['user_id_idx'].nunique()
+n_items = train_df['item_id_idx'].nunique()
+# Data loader function
+def data_loader(data, batch_size, n_usr, n_itm):
+    def sample_neg(x):
+        while True:
+            neg_id = random.randint(0, n_itm - 1)
+            if neg_id not in x:
+                return neg_id
+    interected_items_df = data.groupby('user_id_idx')['item_id_idx'].apply(list).reset_index()
+    indices = list(range(n_usr))
+    if n_usr < batch_size:
+        users = [random.choice(indices) for _ in range(batch_size)]
+    else:
+        users = random.sample(indices, batch_size)
+    users.sort()
+    users_df = pd.DataFrame(users, columns=['users'])
+    interected_items_df = pd.merge(interected_items_df, users_df, how='right', left_on='user_id_idx', right_on='users')
+    pos_items = interected_items_df['item_id_idx'].apply(lambda x: random.choice(x)).values
+    neg_items = interected_items_df['item_id_idx'].apply(lambda x: sample_neg(x)).values
+    return (
+        torch.LongTensor(list(users)).to(device),
+        torch.LongTensor(list(pos_items)).to(device) + n_usr,
+        torch.LongTensor(list(neg_items)).to(device) + n_usr
+    )
+data_loader(train_df, 2, n_users, n_items)
+# Graph construction
+u_t = torch.LongTensor(train_df.user_id_idx)
+i_t = torch.LongTensor(train_df.item_id_idx) + n_users
+train_edge_index = torch.stack((torch.cat([u_t, i_t]), torch.cat([i_t, u_t]))).to(device)
+# LightGCNConv class
+class LightGCNConv(MessagePassing):
+    def __init__(self):
+        super().__init__(aggr='add')
+    def forward(self, x, edge_index):
+        from_, to_ = edge_index
+        deg = degree(to_, x.size(0), dtype=x.dtype)
+        deg_inv_sqrt = deg.pow(-0.5)
+        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
+        norm = deg_inv_sqrt[from_] * deg_inv_sqrt[to_]
+        return self.propagate(edge_index, x=x, norm=norm)
+    def message(self, x_j, norm):
+        return norm.view(-1, 1) * x_j
+# Recommendation System Model
+class RecSysGNN(nn.Module):
+    def __init__(self, latent_dim, num_layers, num_users, num_items):
+        super(RecSysGNN, self).__init__()
+        self.embedding = nn.Embedding(num_users + num_items, latent_dim)
+        self.convs = nn.ModuleList(LightGCNConv() for _ in range(num_layers))
+        self.init_parameters()
+    def init_parameters(self):
+        nn.init.normal_(self.embedding.weight, std=0.1)
+    def forward(self, edge_index):
+        emb0 = self.embedding.weight
+        embs = [emb0]
+        emb = emb0
+        for conv in self.convs:
+            emb = conv(x=emb, edge_index=edge_index)
+            embs.append(emb)
+        return emb0, torch.mean(torch.stack(embs, dim=0), dim=0)
+    def encode_minibatch(self, users, pos_items, neg_items, edge_index):
+        emb0, out = self(edge_index)
+        return out[users], out[pos_items], out[neg_items], emb0[users], emb0[pos_items], emb0[neg_items]
+# Compute BPR Loss
+def compute_bpr_loss(users, users_emb, pos_emb, neg_emb, user_emb0, pos_emb0, neg_emb0):
+    reg_loss = (1 / 2) * (user_emb0.norm().pow(2) + pos_emb0.norm().pow(2) + neg_emb0.norm().pow(2)) / float(len(users))
+    pos_scores = torch.mul(users_emb, pos_emb).sum(dim=1)
+    neg_scores = torch.mul(users_emb, neg_emb).sum(dim=1)
+    bpr_loss = torch.mean(F.softplus(neg_scores - pos_scores))
+    return bpr_loss, reg_loss
+# Ensure `n_users` and `n_items` are defined before creating the model
+if "n_users" not in globals() or "n_items" not in globals():
+    raise ValueError("Ensure `n_users` and `n_items` are defined before initializing the model.")
+if __name__ == "__main__":
+    model = RecSysGNN(latent_dim=64, num_layers=3, num_users=n_users, num_items=n_items).to(device)
+    optimizer = optim.Adam(model.parameters(), lr=0.001)
+    for epoch in range(10):
+        users, pos_items, neg_items = data_loader(train_df, batch_size=256, n_usr=n_users, n_itm=n_items)
+        optimizer.zero_grad()
+        users_emb, pos_emb, neg_emb, user_emb0, pos_emb0, neg_emb0 = model.encode_minibatch(users, pos_items, neg_items, train_edge_index)
+        loss, reg_loss = compute_bpr_loss(users, users_emb, pos_emb, neg_emb, user_emb0, pos_emb0, neg_emb0)
+        loss.backward()
+        optimizer.step()
+        print(f"Epoch {epoch + 1}: Loss = {loss.item():.4f}, Reg Loss = {reg_loss.item():.4f}")
+    # Use a writable directory
+    model_dir = "./models"  # Saves inside the current working directory
+    os.makedirs(model_dir, exist_ok=True)
+    try:
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        model_path = os.path.join(model_dir, f"model_{timestamp}.pth")
+        with open(model_path, 'wb') as f:
+            fcntl.flock(f, fcntl.LOCK_EX)
+            torch.save(model.state_dict(), f)
+            fcntl.flock(f, fcntl.LOCK_UN)
+        print(f"Model saved: {model_path}")
+    except Exception as e:
+        print(f"Error saving model: {e}")

transactions.csv ADDED Viewed

The diff for this file is too large to render. See raw diff