Spaces:

FarahMohsenSamy1
/

debug

Runtime error

App Files Files Community

debug / train.py

FarahMohsenSamy1

Update train.py

9b9e457 verified about 2 months ago

raw

history blame contribute delete

32.2 kB

	import random
	import time
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import torch.optim as optim
	import numpy as np
	import pandas as pd
	import torch_geometric
	from torch_geometric.nn.conv import MessagePassing
	from torch_geometric.utils import degree
	from tqdm.notebook import tqdm
	from sklearn import preprocessing as pp
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import precision_score, recall_score, f1_score
	import scipy.sparse as sp
	import datetime
	import os
	import logging
	import requests
	from filelock import FileLock
	from datasets import load_dataset, Dataset
	from huggingface_hub import HfApi, create_repo, Repository
	import gc
	from typing import Tuple, Optional, Dict, List
	import json


	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	logging.info(f"Using device: {device}")

	cache_base = "/app/cache"
	os.makedirs(f"{cache_base}/huggingface", exist_ok=True)
	os.makedirs(f"{cache_base}/transformers", exist_ok=True)
	os.makedirs(f"{cache_base}/datasets", exist_ok=True)

	# Set all possible Hugging Face cache environment variables
	os.environ['HF_HOME'] = f"{cache_base}/huggingface"
	os.environ['TRANSFORMERS_CACHE'] = f"{cache_base}/transformers"
	os.environ['HF_DATASETS_CACHE'] = f"{cache_base}/datasets"
	os.environ['HUGGINGFACE_HUB_CACHE'] = f"{cache_base}/huggingface"
	os.environ['HF_HUB_CACHE'] = f"{cache_base}/huggingface"


	API_BASE_URL = os.getenv("API_BASE_URL", "https://ssc-grad.up.railway.app/api/transaction/json")
	REPO_ID = "FarahMohsenSamy1/Transactions"
	LOCAL_PATH = "/tmp/transactions.csv"
	METADATA_PATH = "/tmp/dataset_metadata.json"
	UNIQUE_KEYS = ["Transaction_ID", "Customer_ID", "Item_ID"]
	MODEL_DIR = "/app/models"
	LATEST_MODEL_PATH = os.path.join(MODEL_DIR, "latest_model.txt")
	ENCODERS_PATH = os.path.join(MODEL_DIR, "encoders.pkl")


	LATENT_DIM = 64
	NUM_LAYERS = 3
	LEARNING_RATE = 0.001
	BATCH_SIZE = 64
	NUM_EPOCHS = 100
	MIN_RATING = 3.0
	REG_LAMBDA = 1e-4
	# Evaluation parameters
	TOP_K = [5, 10, 20, 30] # Top-K for precision and recall calculation
	EVALUATION_THRESHOLD = 3.5 # Rating threshold for binary relevance


	try:
	api = HfApi(token=os.getenv("HF_TOKEN"))
	logging.info("HuggingFace API initialized successfully")
	except Exception as e:
	logging.error(f"Failed to initialize HF API: {e}")
	api = None

	def save_metadata(metadata: dict):
	"""Save dataset metadata locally."""
	with open(METADATA_PATH, 'w') as f:
	json.dump(metadata, f, indent=2)
	logging.info(f"Metadata saved to {METADATA_PATH}")

	def load_metadata() -> dict:
	"""Load dataset metadata if it exists."""
	if os.path.exists(METADATA_PATH):
	with open(METADATA_PATH, 'r') as f:
	return json.load(f)
	return {}

	def clean_dataframe_for_upload(df: pd.DataFrame) -> pd.DataFrame:
	"""Clean and standardize DataFrame for HuggingFace upload."""
	df_clean = df.copy()

	# Convert all columns to appropriate types
	for col in df_clean.columns:
	if df_clean[col].dtype == 'object':
	# Try to convert to numeric if possible, otherwise keep as string
	try:
	# First try to convert to float
	df_clean[col] = pd.to_numeric(df_clean[col], errors='ignore')
	except:
	# If that fails, ensure it's properly formatted as string
	df_clean[col] = df_clean[col].astype(str)

	# Handle any remaining problematic values
	if df_clean[col].dtype == 'object':
	df_clean[col] = df_clean[col].fillna('').astype(str)

	# Ensure ID columns are strings to avoid conversion issues
	id_columns = ['Transaction_ID', 'Customer_ID', 'Item_ID', 'userId', 'itemId']
	for col in id_columns:
	if col in df_clean.columns:
	df_clean[col] = df_clean[col].astype(str)

	# Handle any NaN values
	df_clean = df_clean.fillna('')

	logging.info(f"Cleaned DataFrame: {len(df_clean)} rows, {len(df_clean.columns)} columns")
	logging.info(f"Column types: {dict(df_clean.dtypes)}")

	return df_clean

	def fetch_api_data(api_url: str) -> pd.DataFrame:
	"""Fetch data from the API and return as a DataFrame."""
	try:
	logging.info(f"Fetching data from {api_url}")
	response = requests.get(api_url, timeout=300)
	if response.status_code == 200:
	data = response.json()
	new_transactions = data.get("data", {}).get("newTransactions", [])
	rating_updates = data.get("data", {}).get("ratingUpdates", [])
	combined = new_transactions + rating_updates

	if combined:
	df = pd.DataFrame(combined)
	logging.info(f"Fetched {len(df)} records from API")
	return df
	else:
	logging.info("No new data found in API response")
	return pd.DataFrame()
	else:
	logging.error(f"Failed to fetch from {api_url} — Status code: {response.status_code}")
	return pd.DataFrame()
	except Exception as e:
	logging.error(f"Error fetching from {api_url}: {e}")
	return pd.DataFrame()

	def load_existing_dataset() -> pd.DataFrame:
	"""Load existing dataset from HuggingFace."""
	try:
	logging.info(f"Loading existing dataset from {REPO_ID}")
	dataset = load_dataset(REPO_ID, split='train')
	df = dataset.to_pandas()
	logging.info(f"Loaded {len(df)} existing records from HuggingFace")
	return df
	except Exception as e:
	logging.warning(f"Could not load existing dataset: {e}")
	return pd.DataFrame()

	def upload_dataset_to_hf(df: pd.DataFrame) -> bool:
	"""Upload dataset to HuggingFace with proper data type handling."""
	try:
	if api is None:
	logging.error("HuggingFace API not initialized")
	return False

	# Clean the DataFrame before upload
	df_clean = clean_dataframe_for_upload(df)

	# Create dataset with explicit feature types to avoid conversion issues
	try:
	dataset = Dataset.from_pandas(df_clean)
	except Exception as e:
	logging.error(f"Error creating dataset from pandas: {e}")
	# Try with string conversion for problematic columns
	for col in df_clean.columns:
	if col in ['Item_ID', 'Customer_ID', 'Transaction_ID', 'userId', 'itemId']:
	df_clean[col] = df_clean[col].astype(str)
	dataset = Dataset.from_pandas(df_clean)

	# Push to hub
	dataset.push_to_hub(REPO_ID, token=os.getenv("HF_TOKEN"))
	logging.info(f"Successfully uploaded {len(df_clean)} records to {REPO_ID}")

	# Update metadata
	metadata = {
	"last_updated": datetime.datetime.now().isoformat(),
	"total_records": len(df_clean),
	"columns": list(df_clean.columns),
	"data_types": {col: str(dtype) for col, dtype in df_clean.dtypes.items()}
	}
	save_metadata(metadata)

	return True
	except Exception as e:
	logging.error(f"Failed to upload dataset to HuggingFace: {e}")
	# Save locally as backup
	try:
	df.to_csv(LOCAL_PATH, index=False)
	logging.info(f"Dataset saved locally as backup: {LOCAL_PATH}")
	except Exception as backup_e:
	logging.error(f"Failed to save backup: {backup_e}")
	return False

	def sync_and_update_dataset() -> Tuple[bool, int, pd.DataFrame]:
	"""Sync and update dataset from API and existing data."""
	logging.info("Starting dataset sync and update process...")

	# Fetch new data from API
	df_new = fetch_api_data(API_BASE_URL)

	# Load existing dataset
	df_existing = load_existing_dataset()

	if df_new.empty and df_existing.empty:
	logging.warning("No data available from API or existing dataset")
	return False, 0, pd.DataFrame()

	# Combine datasets
	if df_existing.empty:
	df_combined = df_new.copy()
	new_records = len(df_new)
	elif df_new.empty:
	df_combined = df_existing.copy()
	new_records = 0
	else:
	df_combined = pd.concat([df_existing, df_new], ignore_index=True)
	# Remove duplicates based on unique keys
	original_len = len(df_combined)
	df_combined = df_combined.drop_duplicates(subset=UNIQUE_KEYS, keep="last").reset_index(drop=True)
	new_records = len(df_combined) - len(df_existing)
	logging.info(f"Removed {original_len - len(df_combined)} duplicate records")

	# Save locally first
	try:
	df_combined.to_csv(LOCAL_PATH, index=False)
	logging.info(f"Saved combined dataset locally: {len(df_combined)} total records")
	except Exception as e:
	logging.error(f"Failed to save locally: {e}")

	# Try to upload to HuggingFace, but don't fail if it doesn't work
	upload_success = upload_dataset_to_hf(df_combined)

	# Return success as True if we have data, regardless of upload status
	return True, new_records, df_combined

	def prepare_data_for_training(df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
	"""Prepare data for collaborative filtering training."""
	logging.info("Preparing data for training...")

	# Standardize column names
	column_mapping = {
	'Customer_ID': 'userId',
	'rating': 'rating',
	'Timestamp': 'timestamp',
	'Item_ID': 'itemId'
	}

	# Rename columns if they exist
	for old_col, new_col in column_mapping.items():
	if old_col in df.columns:
	df = df.rename(columns={old_col: new_col})

	# Select relevant columns for collaborative filtering
	required_cols = ['userId', 'itemId', 'rating']
	optional_cols = ['timestamp']

	available_cols = [col for col in required_cols if col in df.columns]
	available_cols.extend([col for col in optional_cols if col in df.columns])

	df_collab = df[available_cols].copy()

	# Clean data
	df_collab = df_collab.dropna(subset=required_cols)
	df_collab = df_collab[df_collab['rating'] >= MIN_RATING]

	# Convert data types
	df_collab['userId'] = df_collab['userId'].astype(str)
	df_collab['itemId'] = df_collab['itemId'].astype(str)
	df_collab['rating'] = pd.to_numeric(df_collab['rating'], errors='coerce')

	# Remove any rows with invalid ratings
	df_collab = df_collab.dropna(subset=['rating'])

	logging.info(f"Prepared {len(df_collab)} records for training")

	# Check minimum requirements
	n_users = df_collab['userId'].nunique()
	n_items = df_collab['itemId'].nunique()

	stats = {
	'n_users': n_users,
	'n_items': n_items,
	'n_interactions': len(df_collab),
	'sparsity': 1 - (len(df_collab) / (n_users * n_items))
	}

	logging.info(f"Dataset stats: {stats}")

	if n_users < 2 or n_items < 2:
	raise ValueError(f"Insufficient data for training: {n_users} users, {n_items} items")

	return df_collab, stats

	def create_train_test_split(df: pd.DataFrame, test_size: float = 0.2) -> Tuple[pd.DataFrame, pd.DataFrame]:
	"""Create train-test split ensuring proper data distribution."""
	logging.info("Creating train-test split...")

	if len(df) < 10:
	# For very small datasets, use most data for training
	test_size = min(0.1, (len(df) - 1) / len(df))

	train_df, test_df = train_test_split(df, test_size=test_size, random_state=42, stratify=None)

	# Ensure test set only contains users and items present in training set
	train_users = set(train_df['userId'].unique())
	train_items = set(train_df['itemId'].unique())

	test_df = test_df[
	(test_df['userId'].isin(train_users)) &
	(test_df['itemId'].isin(train_items))
	]

	logging.info(f"Train set: {len(train_df)} interactions")
	logging.info(f"Test set: {len(test_df)} interactions")

	return train_df.reset_index(drop=True), test_df.reset_index(drop=True)

	def encode_users_items(train_df: pd.DataFrame, test_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, dict]:
	"""Encode users and items to sequential indices."""
	logging.info("Encoding users and items...")

	le_user = pp.LabelEncoder()
	le_item = pp.LabelEncoder()

	# Create copies to avoid modifying original data
	train_df = train_df.copy()
	test_df = test_df.copy()

	# Fit encoders on training data
	train_df['user_id_idx'] = le_user.fit_transform(train_df['userId'].values)
	train_df['item_id_idx'] = le_item.fit_transform(train_df['itemId'].values)

	# Transform test data
	test_df['user_id_idx'] = le_user.transform(test_df['userId'].values)
	test_df['item_id_idx'] = le_item.transform(test_df['itemId'].values)

	encoders = {
	'user_encoder': le_user,
	'item_encoder': le_item,
	'n_users': len(le_user.classes_),
	'n_items': len(le_item.classes_)
	}

	# Save encoders
	import pickle
	os.makedirs(os.path.dirname(ENCODERS_PATH), exist_ok=True)
	with open(ENCODERS_PATH, 'wb') as f:
	pickle.dump(encoders, f)

	logging.info(f"Encoded {encoders['n_users']} users and {encoders['n_items']} items")

	return train_df, test_df, encoders

	# Data loader function
	def data_loader(data: pd.DataFrame, batch_size: int, n_usr: int, n_itm: int):
	"""Generate batches for training with negative sampling."""
	def sample_neg(user_items: set) -> int:
	while True:
	neg_id = random.randint(0, n_itm - 1)
	if neg_id not in user_items:
	return neg_id

	# Group items by user
	user_items_dict = data.groupby('user_id_idx')['item_id_idx'].apply(set).to_dict()

	# Sample users
	available_users = list(user_items_dict.keys())
	if len(available_users) < batch_size:
	users = random.choices(available_users, k=batch_size)
	else:
	users = random.sample(available_users, batch_size)

	pos_items = []
	neg_items = []

	for user in users:
	user_items = user_items_dict[user]
	pos_item = random.choice(list(user_items))
	neg_item = sample_neg(user_items)

	pos_items.append(pos_item)
	neg_items.append(neg_item)

	return (
	torch.LongTensor(users).to(device),
	torch.LongTensor(pos_items).to(device) + n_usr,
	torch.LongTensor(neg_items).to(device) + n_usr
	)

	# LightGCN Convolution Layer
	class LightGCNConv(MessagePassing):
	def __init__(self):
	super().__init__(aggr='add')

	def forward(self, x, edge_index):
	from_, to_ = edge_index
	deg = degree(to_, x.size(0), dtype=x.dtype)
	deg_inv_sqrt = deg.pow(-0.5)
	deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
	norm = deg_inv_sqrt[from_] * deg_inv_sqrt[to_]
	return self.propagate(edge_index, x=x, norm=norm)

	def message(self, x_j, norm):
	return norm.view(-1, 1) * x_j

	# Recommendation System Model
	class RecSysGNN(nn.Module):
	def __init__(self, latent_dim: int, num_layers: int, num_users: int, num_items: int, dropout: float = 0.1):
	super(RecSysGNN, self).__init__()
	self.num_users = num_users
	self.num_items = num_items
	self.latent_dim = latent_dim
	self.num_layers = num_layers

	self.embedding = nn.Embedding(num_users + num_items, latent_dim)
	self.convs = nn.ModuleList([LightGCNConv() for _ in range(num_layers)])
	self.dropout = nn.Dropout(dropout)

	self.init_parameters()

	def init_parameters(self):
	nn.init.normal_(self.embedding.weight, std=0.1)

	def forward(self, edge_index):
	emb0 = self.embedding.weight
	embs = [emb0]
	emb = emb0

	for conv in self.convs:
	emb = conv(x=emb, edge_index=edge_index)
	emb = self.dropout(emb)
	embs.append(emb)

	return emb0, torch.mean(torch.stack(embs, dim=0), dim=0)

	def encode_minibatch(self, users, pos_items, neg_items, edge_index):
	emb0, final_emb = self(edge_index)
	return (
	final_emb[users], final_emb[pos_items], final_emb[neg_items],
	emb0[users], emb0[pos_items], emb0[neg_items]
	)

	def predict(self, users, items, edge_index):
	"""Predict ratings for user-item pairs."""
	with torch.no_grad():
	_, final_emb = self(edge_index)
	user_emb = final_emb[users]
	item_emb = final_emb[items]
	predictions = torch.mul(user_emb, item_emb).sum(dim=1)
	return predictions

	# Compute BPR Loss with regularization
	def compute_bpr_loss(users, users_emb, pos_emb, neg_emb, user_emb0, pos_emb0, neg_emb0, reg_lambda=REG_LAMBDA):
	reg_loss = reg_lambda * (1/2) * (
	user_emb0.norm().pow(2) +
	pos_emb0.norm().pow(2) +
	neg_emb0.norm().pow(2)
	) / float(len(users))

	pos_scores = torch.mul(users_emb, pos_emb).sum(dim=1)
	neg_scores = torch.mul(users_emb, neg_emb).sum(dim=1)
	bpr_loss = torch.mean(F.softplus(neg_scores - pos_scores))

	return bpr_loss + reg_loss, reg_loss

	def compute_precision_recall_at_k(model: nn.Module, test_df: pd.DataFrame, train_df: pd.DataFrame,
	encoders: dict, edge_index: torch.Tensor, k_values: List[int] = TOP_K) -> Dict[str, float]:
	"""
	Compute precision and recall at K for the recommendation model.

	Args:
	model: Trained recommendation model
	test_df: Test dataset
	train_df: Training dataset (to exclude known interactions)
	encoders: User and item encoders
	edge_index: Graph edge index
	k_values: List of K values to compute metrics for

	Returns:
	Dictionary with precision and recall metrics
	"""
	model.eval()
	logging.info("Computing precision and recall metrics...")

	n_users = encoders['n_users']
	n_items = encoders['n_items']

	# Create test user-item ground truth
	test_user_items = test_df.groupby('user_id_idx')['item_id_idx'].apply(set).to_dict()
	train_user_items = train_df.groupby('user_id_idx')['item_id_idx'].apply(set).to_dict()

	# Filter test users who have enough interactions
	valid_test_users = [user for user, items in test_user_items.items() if len(items) >= min(k_values)]

	if not valid_test_users:
	logging.warning("No valid test users found for evaluation")
	return {f"precision@{k}": 0.0 for k in k_values} \| {f"recall@{k}": 0.0 for k in k_values}

	metrics = {f"precision@{k}": [] for k in k_values}
	metrics.update({f"recall@{k}": [] for k in k_values})

	with torch.no_grad():
	_, final_emb = model(edge_index)

	for user_idx in valid_test_users:
	# Get ground truth items for this user
	true_items = test_user_items[user_idx]

	# Get training items to exclude from recommendations
	train_items = train_user_items.get(user_idx, set())

	# Get user embedding
	user_tensor = torch.LongTensor([user_idx]).to(device)
	user_emb = final_emb[user_tensor]

	# Compute scores for all items
	item_indices = torch.arange(n_items).to(device) + n_users
	item_embs = final_emb[item_indices]
	scores = torch.mm(user_emb, item_embs.t()).squeeze()

	# Mask out training items (set their scores to very low values)
	for train_item in train_items:
	scores[train_item] = float('-inf')

	# Get top-K recommendations for each K value
	for k in k_values:
	_, top_k_items = torch.topk(scores, k)
	top_k_items = top_k_items.cpu().numpy()

	# Calculate precision and recall
	relevant_items = len(set(top_k_items) & true_items)

	precision = relevant_items / k if k > 0 else 0.0
	recall = relevant_items / len(true_items) if len(true_items) > 0 else 0.0

	metrics[f"precision@{k}"].append(precision)
	metrics[f"recall@{k}"].append(recall)

	# Calculate average metrics
	avg_metrics = {}
	for metric_name, values in metrics.items():
	if values:
	avg_metrics[metric_name] = np.mean(values)
	else:
	avg_metrics[metric_name] = 0.0

	# Add F1 scores
	for k in k_values:
	precision = avg_metrics[f"precision@{k}"]
	recall = avg_metrics[f"recall@{k}"]
	if precision + recall > 0:
	f1 = 2 * (precision * recall) / (precision + recall)
	else:
	f1 = 0.0
	avg_metrics[f"f1@{k}"] = f1

	return avg_metrics

	def evaluate_model_ratings(model: nn.Module, test_df: pd.DataFrame, encoders: dict, edge_index: torch.Tensor) -> Dict[str, float]:
	"""
	Evaluate model performance on rating prediction task.

	Args:
	model: Trained recommendation model
	test_df: Test dataset with ratings
	encoders: User and item encoders
	edge_index: Graph edge index

	Returns:
	Dictionary with rating prediction metrics
	"""
	model.eval()
	logging.info("Evaluating rating prediction performance...")

	n_users = encoders['n_users']

	with torch.no_grad():
	# Get predictions for test set
	user_indices = torch.LongTensor(test_df['user_id_idx'].values).to(device)
	item_indices = torch.LongTensor(test_df['item_id_idx'].values).to(device) + n_users

	predictions = model.predict(user_indices, item_indices, edge_index)
	predictions = predictions.cpu().numpy()

	# Get actual ratings
	actual_ratings = test_df['rating'].values

	# Convert to binary for precision/recall (above threshold = relevant)
	binary_actual = (actual_ratings >= EVALUATION_THRESHOLD).astype(int)
	binary_pred = (predictions >= np.mean(predictions)).astype(int)

	# Calculate metrics
	try:
	precision = precision_score(binary_actual, binary_pred, zero_division=0)
	recall = recall_score(binary_actual, binary_pred, zero_division=0)
	f1 = f1_score(binary_actual, binary_pred, zero_division=0)
	except Exception as e:
	logging.warning(f"Error calculating rating-based metrics: {e}")
	precision = recall = f1 = 0.0

	# Calculate RMSE and MAE
	mse = np.mean((predictions - actual_ratings) ** 2)
	rmse = np.sqrt(mse)
	mae = np.mean(np.abs(predictions - actual_ratings))

	return {
	'rating_precision': precision,
	'rating_recall': recall,
	'rating_f1': f1,
	'rmse': rmse,
	'mae': mae
	}

	def train_model(train_df: pd.DataFrame, test_df: pd.DataFrame, encoders: dict) -> Tuple[nn.Module, Dict[str, float]]:
	"""Train the recommendation model and return evaluation metrics."""
	logging.info("Starting model training...")

	n_users = encoders['n_users']
	n_items = encoders['n_items']

	# Create edge index for graph
	u_t = torch.LongTensor(train_df['user_id_idx'].values)
	i_t = torch.LongTensor(train_df['item_id_idx'].values) + n_users
	train_edge_index = torch.stack((
	torch.cat([u_t, i_t]),
	torch.cat([i_t, u_t])
	)).to(device)

	# Initialize model
	model = RecSysGNN(
	latent_dim=LATENT_DIM,
	num_layers=NUM_LAYERS,
	num_users=n_users,
	num_items=n_items
	).to(device)

	optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
	scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

	model.train()
	best_loss = float('inf')

	for epoch in range(NUM_EPOCHS):
	epoch_loss = 0.0
	epoch_reg_loss = 0.0
	num_batches = 0

	# Multiple batches per epoch
	for _ in range(max(1, len(train_df) // BATCH_SIZE)):
	users, pos_items, neg_items = data_loader(train_df, BATCH_SIZE, n_users, n_items)

	optimizer.zero_grad()

	users_emb, pos_emb, neg_emb, user_emb0, pos_emb0, neg_emb0 = model.encode_minibatch(
	users, pos_items, neg_items, train_edge_index
	)

	loss, reg_loss = compute_bpr_loss(
	users, users_emb, pos_emb, neg_emb,
	user_emb0, pos_emb0, neg_emb0
	)

	loss.backward()
	optimizer.step()

	epoch_loss += loss.item()
	epoch_reg_loss += reg_loss.item()
	num_batches += 1

	# Clean up tensors
	del users, pos_items, neg_items, users_emb, pos_emb, neg_emb

	scheduler.step()

	avg_loss = epoch_loss / num_batches
	avg_reg_loss = epoch_reg_loss / num_batches

	if avg_loss < best_loss:
	best_loss = avg_loss

	if epoch % 10 == 0:
	logging.info(f"Epoch {epoch + 1}/{NUM_EPOCHS}: Loss = {avg_loss:.4f}, Reg Loss = {avg_reg_loss:.4f}")

	# Memory cleanup
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	logging.info(f"Training completed. Best loss: {best_loss:.4f}")

	# Evaluate model after training
	logging.info("Starting model evaluation...")

	# Compute precision and recall at K
	ranking_metrics = compute_precision_recall_at_k(
	model, test_df, train_df, encoders, train_edge_index, TOP_K
	)

	# Compute rating prediction metrics
	rating_metrics = evaluate_model_ratings(
	model, test_df, encoders, train_edge_index
	)

	# Combine all metrics
	all_metrics = {ranking_metrics, rating_metrics}

	# Log all metrics
	logging.info("=== Model Evaluation Results ===")
	for metric_name, value in all_metrics.items():
	logging.info(f"{metric_name}: {value:.4f}")

	return model, all_metrics

	def save_model(model: nn.Module, encoders: dict, metrics: Dict[str, float]):
	"""Save the trained model, encoders, and evaluation metrics."""
	os.makedirs(MODEL_DIR, exist_ok=True)

	timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
	model_path = os.path.join(MODEL_DIR, f"model_{timestamp}.pth")

	# Save model state dict with metrics
	torch.save({
	'model_state_dict': model.state_dict(),
	'model_config': {
	'latent_dim': LATENT_DIM,
	'num_layers': NUM_LAYERS,
	'num_users': encoders['n_users'],
	'num_items': encoders['n_items']
	},
	'encoders': encoders,
	'metrics': metrics,
	'timestamp': timestamp
	}, model_path)

	# Update latest model path
	with open(LATEST_MODEL_PATH, 'w') as f:
	f.write(model_path)

	# Save metrics separately for easy access
	metrics_path = os.path.join(MODEL_DIR, f"metrics_{timestamp}.json")
	with open(metrics_path, 'w') as f:
	json.dump(metrics, f, indent=2)

	logging.info(f"Model saved: {model_path}")
	logging.info(f"Metrics saved: {metrics_path}")
	logging.info(f"Latest model path updated: {LATEST_MODEL_PATH}")

	def print_evaluation_summary(metrics: Dict[str, float], encoders: dict):
	"""Print a comprehensive evaluation summary."""
	print("\n" + "="*60)
	print("MODEL EVALUATION SUMMARY")
	print("="*60)

	print(f"\nDataset Information:")
	print(f" - Number of users: {encoders['n_users']}")
	print(f" - Number of items: {encoders['n_items']}")
	print(f" - Total interactions: {encoders['n_users'] * encoders['n_items']}")

	print(f"\nRanking Metrics (Top-K Recommendations):")
	for k in TOP_K:
	precision = metrics.get(f"precision@{k}", 0)
	recall = metrics.get(f"recall@{k}", 0)
	f1 = metrics.get(f"f1@{k}", 0)
	print(f" Top-{k}:")
	print(f" - Precision: {precision:.4f}")
	print(f" - Recall: {recall:.4f}")
	print(f" - F1-Score: {f1:.4f}")

	print(f"\nRating Prediction Metrics:")
	print(f" - Precision: {metrics.get('rating_precision', 0):.4f}")
	print(f" - Recall: {metrics.get('rating_recall', 0):.4f}")
	print(f" - F1-Score: {metrics.get('rating_f1', 0):.4f}")
	print(f" - RMSE: {metrics.get('rmse', 0):.4f}")
	print(f" - MAE: {metrics.get('mae', 0):.4f}")

	print("\n" + "="*60)

	def main():
	"""Main training pipeline with evaluation."""
	try:
	logging.info("=== Starting Enhanced Training Pipeline with Evaluation ===")

	# Step 1: Sync and update dataset
	sync_success, new_records, df_complete = sync_and_update_dataset()

	if not sync_success or df_complete.empty:
	logging.error("Dataset sync failed or no data available. Aborting training.")
	return

	logging.info(f"Dataset sync completed. New records: {new_records}, Total records: {len(df_complete)}")

	# Step 2: Prepare data for training
	df_collab, stats = prepare_data_for_training(df_complete)

	if len(df_collab) < 10:
	logging.warning(f"Insufficient data for meaningful training: {len(df_collab)} interactions")
	return

	# Step 3: Create train-test split
	train_df, test_df = create_train_test_split(df_collab)

	if len(test_df) == 0:
	logging.warning("No test data available for evaluation")
	return

	# Step 4: Encode users and items
	train_df, test_df, encoders = encode_users_items(train_df, test_df)

	# Step 5: Train model and get evaluation metrics
	model, metrics = train_model(train_df, test_df, encoders)

	# Step 6: Save model with metrics
	save_model(model, encoders, metrics)

	# Step 7: Print comprehensive evaluation summary
	print_evaluation_summary(metrics, encoders)

	logging.info("=== Training Pipeline Completed Successfully ===")
	logging.info(f"Final model performance summary:")
	logging.info(f" - Users: {encoders['n_users']}, Items: {encoders['n_items']}")
	logging.info(f" - Best Precision@10: {metrics.get('precision@10', 0):.4f}")
	logging.info(f" - Best Recall@10: {metrics.get('recall@10', 0):.4f}")
	logging.info(f" - RMSE: {metrics.get('rmse', 0):.4f}")

	except Exception as e:
	logging.error(f"Training pipeline failed: {e}")
	import traceback
	logging.error(f"Full traceback: {traceback.format_exc()}")
	raise

	def load_and_evaluate_saved_model(model_path: str = None) -> Dict[str, float]:
	"""
	Load a saved model and return its evaluation metrics.

	Args:
	model_path: Path to the saved model. If None, loads the latest model.

	Returns:
	Dictionary with evaluation metrics
	"""
	if model_path is None:
	if os.path.exists(LATEST_MODEL_PATH):
	with open(LATEST_MODEL_PATH, 'r') as f:
	model_path = f.read().strip()
	else:
	raise FileNotFoundError("No saved model found")

	# Load model checkpoint
	checkpoint = torch.load(model_path, map_location=device)

	if 'metrics' in checkpoint:
	logging.info("Loaded evaluation metrics from saved model:")
	for metric_name, value in checkpoint['metrics'].items():
	logging.info(f" {metric_name}: {value:.4f}")
	return checkpoint['metrics']
	else:
	logging.warning("No evaluation metrics found in saved model")
	return {}

	if __name__ == "__main__":
	main()