Spaces:

Ivanrs
/

Fed-AE-Kidney-Stone-Corruption-Detection

Sleeping

App Files Files Community

Ivanrs commited on May 30

Commit

e75d4ed

verified ·

1 Parent(s): ae5898f

Upload 15 files

Browse files

Files changed (16) hide show

.gitattributes +5 -0
example_imgs/72222-SectionIVa+WK maj_0009-60.png +0 -0
example_imgs/Section_Va_72845-3-18.png +3 -0
example_imgs/TypeIVa2_N47583_Notteb-11.png +3 -0
example_imgs/TypeIVd_Sect_LC3373-65.png +3 -0
example_imgs/TypeIa_LaosNÂ°15_Image21-25.png +3 -0
example_imgs/typIVc_IVbsectbis-43.png +3 -0
models/Daudon_MIX/best_autoencoder_Daudon_MIX.pth +3 -0
models/Daudon_SEC/best_autoencoder_Daudon_SEC.pth +3 -0
models/Daudon_SUR/best_autoencoder_Daudon_SUR.pth +3 -0
requirements.txt +8 -0
simple_anomaly_detector.py +214 -0
simple_gradio_app.py +357 -0
utils/__init__.py +13 -0
utils/data_utils.py +416 -0
utils/metrics.py +316 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+example_imgs/Section_Va_72845-3-18.png filter=lfs diff=lfs merge=lfs -text
+example_imgs/TypeIa_LaosNÂ°15_Image21-25.png filter=lfs diff=lfs merge=lfs -text
+example_imgs/TypeIVa2_N47583_Notteb-11.png filter=lfs diff=lfs merge=lfs -text
+example_imgs/TypeIVd_Sect_LC3373-65.png filter=lfs diff=lfs merge=lfs -text
+example_imgs/typIVc_IVbsectbis-43.png filter=lfs diff=lfs merge=lfs -text

example_imgs/72222-SectionIVa+WK maj_0009-60.png ADDED Viewed

example_imgs/Section_Va_72845-3-18.png ADDED Viewed

Git LFS Details

SHA256: 77e446c7239e0d9dc169a90a4f1c2642eb77638a461ae317470f88a00cb036ba
Pointer size: 131 Bytes
Size of remote file: 128 kB

example_imgs/TypeIVa2_N47583_Notteb-11.png ADDED Viewed

Git LFS Details

SHA256: 1ca75687d778e60f8c5b65cbf785505cbf25bc12f6bcffdf7e9af881426e9243
Pointer size: 131 Bytes
Size of remote file: 116 kB

example_imgs/TypeIVd_Sect_LC3373-65.png ADDED Viewed

Git LFS Details

SHA256: e607af6d6285c7d79d1506fe39f9f3a851dcd9a0a6593df357f00b61ba07ebb2
Pointer size: 131 Bytes
Size of remote file: 122 kB

example_imgs/TypeIa_LaosNÂ°15_Image21-25.png ADDED Viewed

Git LFS Details

SHA256: c91b28a8d90256bc09442bd3b62751ea241a3ab9522398ba2e4f7d642d76de5a
Pointer size: 131 Bytes
Size of remote file: 122 kB

example_imgs/typIVc_IVbsectbis-43.png ADDED Viewed

Git LFS Details

SHA256: cbae5243d74e2f9c0156c804773ad9fe14c62d9b374c9798c5fee95b7bdddf38
Pointer size: 131 Bytes
Size of remote file: 141 kB

models/Daudon_MIX/best_autoencoder_Daudon_MIX.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b63d58b9d42c9de2b912d2cbd770c1b1b93c591965e44e6d1479ee0caa01007
+size 123579888

models/Daudon_SEC/best_autoencoder_Daudon_SEC.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:687f67252659ffbdde997adb689b24d80b8f77a4baedca2d52cdadbd4d30fa65
+size 123579888

models/Daudon_SUR/best_autoencoder_Daudon_SUR.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e969a304b220177169ff5e46668110cb62fdd4752ffdf4ecb84bbf50d68bff61
+size 123579888

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch>=2.0.0
+torchvision>=0.15.0
+flwr>=1.6.0
+numpy>=1.24.0
+Pillow>=9.5.0
+matplotlib>=3.7.0
+scikit-learn>=1.3.0
+tqdm>=4.65.0

simple_anomaly_detector.py ADDED Viewed

	@@ -0,0 +1,214 @@

+"""
+Simple Anomaly Detector using Reconstruction Error
+A minimal implementation for testing corruption intensity using autoencoder reconstruction error
+"""
+import torch
+import torch.nn as nn
+import numpy as np
+from PIL import Image
+import torchvision.transforms as transforms
+from typing import Union
+import random
+from models import Autoencoder
+from utils.data_utils import ImageCorruption
+import config
+def apply_corruption(image_tensor: torch.Tensor, corruption_type: str = 'random') -> torch.Tensor:
+    """
+    Simple function to apply corruption to an image tensor
+    Args:
+        image_tensor: Input image tensor (C, H, W)
+        corruption_type: Type of corruption ('noise', 'blur', 'brightness', 'contrast', 'random')
+    Returns:
+        Corrupted image tensor
+    """
+    # Create corruption object with 100% probability to ensure corruption is applied
+    corruptor = ImageCorruption(corruption_prob=1.0)
+    if corruption_type == 'noise':
+        return corruptor.gaussian_noise(image_tensor.clone())
+    elif corruption_type == 'blur':
+        return corruptor.blur(image_tensor.clone())
+    elif corruption_type == 'brightness':
+        return corruptor.brightness_change(image_tensor.clone())
+    elif corruption_type == 'contrast':
+        return corruptor.contrast_change(image_tensor.clone())
+    elif corruption_type == 'random':
+        return corruptor.apply_random_corruption(image_tensor.clone())
+    else:
+        raise ValueError(f"Unknown corruption type: {corruption_type}")
+class SimpleAnomalyDetector:
+    """Simple anomaly detector based on reconstruction error"""
+    def __init__(self, model_path: str):
+        """
+        Initialize the detector with a trained autoencoder
+        Args:
+            model_path: Path to the trained autoencoder (.pth file)
+        """
+        self.device = torch.device(config.DEVICE)
+        self.model = self._load_model(model_path)
+        self.criterion = nn.MSELoss()
+        # Image preprocessing - simplified and more robust
+        self.transform = transforms.Compose([
+            transforms.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                               std=[0.229, 0.224, 0.225])
+        ])
+        print(f"✅ Anomaly detector ready! Using device: {self.device}")
+        print(f"📏 Image size: {config.IMAGE_SIZE}x{config.IMAGE_SIZE}")
+    def _load_model(self, model_path: str) -> Autoencoder:
+        """Load the trained autoencoder model"""
+        print(f"📥 Loading model from {model_path}")
+        # Load checkpoint (weights_only=False for compatibility with saved metadata)
+        checkpoint = torch.load(model_path, map_location=self.device, weights_only=False)
+        # Create model with same architecture
+        model = Autoencoder(
+            input_channels=config.CHANNELS,
+            latent_dim=config.LATENT_DIM
+        )
+        # Load trained weights
+        model.load_state_dict(checkpoint['model_state_dict'])
+        model.to(self.device)
+        model.eval()
+        return model
+    def calculate_reconstruction_error(self, image: Union[str, Image.Image, torch.Tensor]) -> float:
+        """
+        Calculate reconstruction error for a single image
+        Args:
+            image: Can be:
+                - String path to image file
+                - PIL Image object
+                - PyTorch tensor (C, H, W) or (1, C, H, W)
+        Returns:
+            Reconstruction error as a float (higher = more anomalous)
+        """
+        # Get image size - handle both tuple and integer formats
+        if isinstance(config.IMAGE_SIZE, tuple):
+            target_size = config.IMAGE_SIZE  # (256, 256)
+        else:
+            target_size = (config.IMAGE_SIZE, config.IMAGE_SIZE)
+        # Convert input to tensor
+        if isinstance(image, str):
+            # Load from file path
+            try:
+                image_pil = Image.open(image).convert('RGB')
+                # Resize the image properly
+                image_pil = image_pil.resize(target_size, Image.LANCZOS)
+                image_tensor = transforms.ToTensor()(image_pil)
+                # Apply normalization
+                normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+                image_tensor = normalize(image_tensor).unsqueeze(0)  # Add batch dimension
+            except Exception as e:
+                raise ValueError(f"Error loading image from {image}: {e}")
+        elif isinstance(image, Image.Image):
+            # PIL Image
+            try:
+                image_pil = image.convert('RGB')
+                image_pil = image_pil.resize(target_size, Image.LANCZOS)
+                image_tensor = transforms.ToTensor()(image_pil)
+                normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+                image_tensor = normalize(image_tensor).unsqueeze(0)
+            except Exception as e:
+                raise ValueError(f"Error processing PIL Image: {e}")
+        elif isinstance(image, torch.Tensor):
+            # PyTorch tensor
+            if image.dim() == 3:  # (C, H, W)
+                image_tensor = image.unsqueeze(0)  # Add batch dimension
+            elif image.dim() == 4:  # (1, C, H, W)
+                image_tensor = image
+            else:
+                raise ValueError(f"Unexpected tensor dimensions: {image.shape}")
+        else:
+            raise ValueError(f"Unsupported image type: {type(image)}")
+        # Move to device
+        image_tensor = image_tensor.to(self.device)
+        # Calculate reconstruction error
+        with torch.no_grad():
+            reconstructed, _ = self.model(image_tensor)
+            error = self.criterion(reconstructed, image_tensor)
+        return error.item()
+def test_detector_example():
+    """Example usage of the simple anomaly detector"""
+    # You need to specify the path to your trained model
+    model_path = "models/All_Datasets_MIX/best_autoencoder_All_Datasets_MIX.pth"  # Change this!
+    try:
+        # Initialize detector
+        detector = SimpleAnomalyDetector(model_path)
+        # Test with some images from your dataset
+        from utils.data_utils import create_global_test_loader
+        # Get a test loader
+        test_loader = create_global_test_loader(
+            datasets=["Michel Daudon (w256 1k v1)", "Jonathan El-Beze (w256 1k v1)"],
+            subversions=["MIX"]
+        )
+        print("\n🧪 Testing reconstruction errors:")
+        print("=" * 50)
+        # Test a few images
+        for i, (images, labels) in enumerate(test_loader):
+            if i >= 3:  # Test only first 3 batches
+                break
+            for j in range(min(2, images.size(0))):  # Test 2 images per batch
+                clean_image = images[j]
+                # Test clean image
+                clean_error = detector.calculate_reconstruction_error(clean_image)
+                # Test corrupted versions
+                corrupted_noise = apply_corruption(clean_image, 'noise')
+                corrupted_blur = apply_corruption(clean_image, 'blur')
+                noise_error = detector.calculate_reconstruction_error(corrupted_noise)
+                blur_error = detector.calculate_reconstruction_error(corrupted_blur)
+                print(f"\nImage {i*2 + j + 1} (Class: {labels[j]}):")
+                print(f"  Clean:           {clean_error:.6f}")
+                print(f"  Noise corrupted: {noise_error:.6f} (x{noise_error/clean_error:.2f})")
+                print(f"  Blur corrupted:  {blur_error:.6f} (x{blur_error/clean_error:.2f})")
+        print(f"\n💡 Usage tip: Higher reconstruction error = more anomalous/corrupted")
+        print(f"   You can set a threshold (e.g., 0.01) above which images are considered anomalous")
+    except FileNotFoundError:
+        print(f"❌ Model file not found: {model_path}")
+        print("   Please update the model_path variable with your actual model file")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+if __name__ == "__main__":
+    test_detector_example()

simple_gradio_app.py ADDED Viewed

	@@ -0,0 +1,357 @@

+"""
+Simple Gradio Application for Anomaly Detection Testing
+Shows embedding analysis instead of reconstructed images
+"""
+import gradio as gr
+import torch
+import numpy as np
+from PIL import Image
+import matplotlib.pyplot as plt
+import seaborn as sns
+from scipy import stats
+import io
+import base64
+from simple_anomaly_detector import SimpleAnomalyDetector
+from image_corruption_utils import corrupt_image
+# Global variables to store models
+models = {
+    "Daudon_MIX": "models/Daudon_MIX/best_autoencoder_Daudon_MIX.pth",
+    "Daudon_SEC": "models/Daudon_SEC/best_autoencoder_Daudon_SEC.pth",
+    "Daudon_SUR": "models/Daudon_SUR/best_autoencoder_Daudon_SUR.pth"
+}
+current_detector = None
+current_model_name = None
+def load_model(model_name):
+    """Load the selected model"""
+    global current_detector, current_model_name
+    try:
+        if model_name != current_model_name:
+            print(f"Loading model: {model_name}")
+            model_path = models[model_name]
+            current_detector = SimpleAnomalyDetector(model_path)
+            current_model_name = model_name
+            return f"✅ Model {model_name} loaded!"
+        return f"✅ Model {model_name} already loaded"
+    except Exception as e:
+        return f"❌ Error loading {model_name}: {str(e)}"
+def get_embedding_and_stats(image):
+    """Get embedding from autoencoder and calculate statistics"""
+    try:
+        from torchvision import transforms
+        import config
+        # Get image size
+        if isinstance(config.IMAGE_SIZE, tuple):
+            target_size = config.IMAGE_SIZE
+        else:
+            target_size = (config.IMAGE_SIZE, config.IMAGE_SIZE)
+        # Preprocess
+        image_pil = image.convert('RGB').resize(target_size, Image.LANCZOS)
+        transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+        image_tensor = transform(image_pil).unsqueeze(0).to(current_detector.device)
+        # Get embedding (latent representation)
+        with torch.no_grad():
+            _, embedding = current_detector.model(image_tensor)
+        # Convert to numpy for analysis
+        embedding_np = embedding.squeeze(0).cpu().numpy().flatten()
+        # Calculate statistics
+        stats_dict = {
+            'mean': float(np.mean(embedding_np)),
+            'median': float(np.median(embedding_np)),
+            'std': float(np.std(embedding_np)),
+            'min': float(np.min(embedding_np)),
+            'max': float(np.max(embedding_np)),
+            'q25': float(np.percentile(embedding_np, 25)),
+            'q75': float(np.percentile(embedding_np, 75)),
+            'skewness': float(stats.skew(embedding_np)),
+            'kurtosis': float(stats.kurtosis(embedding_np)),
+            'variance': float(np.var(embedding_np)),
+            'range': float(np.max(embedding_np) - np.min(embedding_np)),
+            'iqr': float(np.percentile(embedding_np, 75) - np.percentile(embedding_np, 25))
+        }
+        # Create visualization
+        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
+        fig.suptitle(f'Embedding Analysis (Dimension: {len(embedding_np)})', fontsize=16)
+        # Histogram
+        axes[0, 0].hist(embedding_np, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
+        axes[0, 0].set_title('Distribution Histogram')
+        axes[0, 0].set_xlabel('Embedding Values')
+        axes[0, 0].set_ylabel('Frequency')
+        axes[0, 0].grid(True, alpha=0.3)
+        # Box plot
+        axes[0, 1].boxplot(embedding_np, vert=True)
+        axes[0, 1].set_title('Box Plot')
+        axes[0, 1].set_ylabel('Embedding Values')
+        axes[0, 1].grid(True, alpha=0.3)
+        # Q-Q plot (normal distribution)
+        stats.probplot(embedding_np, dist="norm", plot=axes[1, 0])
+        axes[1, 0].set_title('Q-Q Plot (Normal Distribution)')
+        axes[1, 0].grid(True, alpha=0.3)
+        # Embedding values plot
+        axes[1, 1].plot(embedding_np, alpha=0.7, color='red', linewidth=1)
+        axes[1, 1].set_title('Embedding Values Sequence')
+        axes[1, 1].set_xlabel('Dimension Index')
+        axes[1, 1].set_ylabel('Value')
+        axes[1, 1].grid(True, alpha=0.3)
+        plt.tight_layout()
+        # Convert plot to image
+        buf = io.BytesIO()
+        plt.savefig(buf, format='png', dpi=100, bbox_inches='tight')
+        buf.seek(0)
+        plot_image = Image.open(buf)
+        plt.close()
+        return embedding_np, stats_dict, plot_image
+    except Exception as e:
+        print(f"Error in embedding analysis: {e}")
+        return None, {}, None
+def format_stats_text(stats_dict):
+    """Format statistics into readable text"""
+    if not stats_dict:
+        return "❌ Error calculating statistics"
+    text = f"""📊 EMBEDDING STATISTICS
+🎯 Central Tendency:
+   Mean: {stats_dict['mean']:.6f}
+   Median: {stats_dict['median']:.6f}
+📏 Spread:
+   Std Dev: {stats_dict['std']:.6f}
+   Variance: {stats_dict['variance']:.6f}
+   Range: {stats_dict['range']:.6f}
+   IQR: {stats_dict['iqr']:.6f}
+📈 Extremes:
+   Min: {stats_dict['min']:.6f}
+   Max: {stats_dict['max']:.6f}
+   Q25: {stats_dict['q25']:.6f}
+   Q75: {stats_dict['q75']:.6f}
+🔄 Shape:
+   Skewness: {stats_dict['skewness']:.6f}
+   Kurtosis: {stats_dict['kurtosis']:.6f}
+"""
+    return text
+def classify_image(reconstruction_error, threshold):
+    """Classify image as corrupted or clean based on threshold"""
+    is_corrupted = reconstruction_error > threshold
+    confidence = abs(reconstruction_error - threshold) / threshold * 100
+    if is_corrupted:
+        classification = "🚨 CORRUPTED/ANOMALOUS"
+        color_indicator = "🔴"
+        explanation = f"Reconstruction error ({reconstruction_error:.6f}) > Threshold ({threshold:.6f})"
+    else:
+        classification = "✅ CLEAN/NORMAL"
+        color_indicator = "🟢"
+        explanation = f"Reconstruction error ({reconstruction_error:.6f}) ≤ Threshold ({threshold:.6f})"
+    # Calculate how far from threshold (as percentage)
+    distance_pct = (reconstruction_error - threshold) / threshold * 100
+    classification_text = f"""🎯 ANOMALY CLASSIFICATION
+{color_indicator} Status: {classification}
+📊 Details:
+   Reconstruction Error: {reconstruction_error:.6f}
+   Threshold: {threshold:.6f}
+   Distance from Threshold: {distance_pct:+.2f}%
+📝 Explanation:
+   {explanation}
+💡 Confidence Indicator:
+   • Distance > 50%: High confidence
+   • Distance 10-50%: Medium confidence
+   • Distance < 10%: Low confidence (near threshold)
+🎚️ Current Distance: {abs(distance_pct):.2f}% ({'High' if abs(distance_pct) > 50 else 'Medium' if abs(distance_pct) > 10 else 'Low'} confidence)"""
+    return classification_text, is_corrupted
+def process_image(model_name, image, corruption_type, intensity, threshold):
+    """Main processing function"""
+    try:
+        # Load model
+        load_status = load_model(model_name)
+        if "❌" in load_status:
+            return None, None, load_status, 0.0, "", ""
+        if image is None:
+            return None, None, "❌ Please upload an image", 0.0, "", ""
+        # Apply corruption
+        if corruption_type == "none":
+            corrupted_image = image.copy()
+            corruption_info = "No corruption applied"
+        else:
+            corrupted_image = corrupt_image(image, corruption_type, intensity)
+            corruption_info = f"Applied {corruption_type} corruption (intensity: {intensity})"
+        # Calculate reconstruction error
+        error = current_detector.calculate_reconstruction_error(corrupted_image)
+        # Get embedding and statistics
+        embedding, stats_dict, plot_image = get_embedding_and_stats(corrupted_image)
+        # Format statistics text
+        stats_text = format_stats_text(stats_dict)
+        # Classify image based on threshold
+        classification_text, is_corrupted = classify_image(error, threshold)
+        # Status message
+        status = f"""✅ Processing complete!
+        📊 Model: {model_name}
+        🔧 {corruption_info}
+        📈 Reconstruction Error: {error:.6f}
+        🎚️ Threshold: {threshold:.6f}
+        🎯 Classification: {'CORRUPTED' if is_corrupted else 'CLEAN'}
+        🧠 Embedding Dimension: {len(embedding) if embedding is not None else 'N/A'}
+        💡 Higher error = more anomalous"""
+        return corrupted_image, plot_image, status, error, stats_text, classification_text
+    except Exception as e:
+        error_msg = f"❌ Error: {str(e)}"
+        return None, None, error_msg, 0.0, "", ""
+# Create interface
+def create_interface():
+    with gr.Blocks(title="Anomaly Detection Tester") as demo:
+        gr.Markdown("# 🔍 Federated Autoencoder for Kidney Stone Image Corruption Detection")
+        gr.Markdown("Upload an image, analyze its latent representation, and classify it as corrupted or clean using a threshold.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### ⚙️ Model & Corruption Settings")
+                model_dropdown = gr.Dropdown(
+                    choices=list(models.keys()),
+                    value="Daudon_MIX",
+                    label="🤖 Select Model"
+                )
+                corruption_dropdown = gr.Dropdown(
+                    choices=["none", "noise", "blur", "brightness", "contrast", "saturation", "random"],
+                    value="none",
+                    label="🔧 Corruption Type"
+                )
+                intensity_slider = gr.Slider(
+                    minimum=0.1,
+                    maximum=3.0,
+                    value=1.0,
+                    step=0.1,
+                    label="💪 Corruption Intensity"
+                )
+                gr.Markdown("### 🎚️ Classification Settings")
+                threshold_slider = gr.Slider(
+                    minimum=0.1,
+                    maximum=3.0,
+                    value=1.0,
+                    step=0.1,
+                    label="🎯 Anomaly Threshold (Reconstruction Error)"
+                )
+                gr.Markdown("### 📸 Image Input")
+                image_input = gr.Image(type="pil", label="Upload Image")
+                # Add examples section
+                gr.Markdown("### 📁 Example Images")
+                # You can specify your example image paths here
+                example_images = [
+                    ["example_imgs/TypeIa_LaosNÂ°15_Image21-25.png", "Clean Daudon MIX-Subtype_Ia"],
+                    ["example_imgs/72222-SectionIVa+WK maj_0009-60.png", "Clean Daudon MIX-Subtype_IVa"],
+                    ["example_imgs/TypeIVa2_N47583_Notteb-11.png", "Clean Daudon MIX-Subtype_IVa2"],
+                    ["example_imgs/typIVc_IVbsectbis-43.png", "Clean Daudon MIX-Subtype_IVc"],
+                    ["example_imgs/TypeIVd_Sect_LC3373-65.png", "Clean Daudon MIX-Subtype_IVd"],
+                    ["example_imgs/Section_Va_72845-3-18.png", "Clean Daudon MIX-Subtype_Va"],
+                ]
+                examples_component = gr.Examples(
+                    examples=example_images,
+                    inputs=image_input,
+                    label="Daudon MIX Example Clean Images",
+                    examples_per_page=6,
+                    cache_examples=False
+                )
+                process_btn = gr.Button("🚀 Analyze & Classify", variant="primary", size="lg")
+            with gr.Column(scale=1):
+                gr.Markdown("### 📊 Results")
+                status_output = gr.Textbox(label="📋 Status", lines=8)
+                error_output = gr.Number(label="📈 Reconstruction Error", precision=6)
+                corrupted_output = gr.Image(label="🔧 Input Image (Corrupted)")
+        with gr.Row():
+            embedding_plot = gr.Image(label="🧠 Embedding Analysis")
+        with gr.Row():
+            stats_output = gr.Textbox(label="📊 Embedding Statistics", lines=20)
+            classification_output = gr.Textbox(label="🎯 Classification Result", lines=15)
+        # Connect the button
+        process_btn.click(
+            fn=process_image,
+            inputs=[model_dropdown, image_input, corruption_dropdown, intensity_slider, threshold_slider],
+            outputs=[corrupted_output, embedding_plot, status_output, error_output, stats_output, classification_output]
+        )
+    return demo
+if __name__ == "__main__":
+    print("🚀 Starting Embedding Analysis App...")
+    demo = create_interface()
+    demo.launch(
+        server_name="127.0.0.1",
+        server_port=7860,
+        share=False,
+        debug=False,
+        show_error=True
+    )

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from .data_utils import (
+    create_client_dataloaders,
+    create_global_test_loader,
+    ImageCorruption,
+    KidneyStoneDataset
+)
+__all__ = [
+    'create_client_dataloaders',
+    'create_global_test_loader',
+    'ImageCorruption',
+    'KidneyStoneDataset'
+]

utils/data_utils.py ADDED Viewed

	@@ -0,0 +1,416 @@

+"""
+Data utilities for federated autoencoder training
+"""
+import os
+import random
+import numpy as np
+from PIL import Image, ImageFilter, ImageEnhance
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torchvision import transforms
+from sklearn.model_selection import train_test_split
+from collections import Counter
+import config
+class ImageCorruption:
+    """Class to handle various image corruptions"""
+    def __init__(self, corruption_prob=0.1):
+        self.corruption_prob = corruption_prob
+    def gaussian_noise(self, image):
+        """Add Gaussian noise to image"""
+        if random.random() < self.corruption_prob:
+            noise = torch.randn_like(image) * 0.1
+            image = torch.clamp(image + noise, 0, 1)
+        return image
+    def salt_pepper_noise(self, image):
+        """Add salt and pepper noise"""
+        if random.random() < self.corruption_prob:
+            noise = torch.rand_like(image)
+            salt = noise > 0.95
+            pepper = noise < 0.05
+            image[salt] = 1.0
+            image[pepper] = 0.0
+        return image
+    def blur(self, image):
+        """Apply blur to image"""
+        if random.random() < self.corruption_prob:
+            # Convert to PIL for blur operation
+            if isinstance(image, torch.Tensor):
+                image_pil = transforms.ToPILImage()(image)
+                image_pil = image_pil.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 2.0)))
+                image = transforms.ToTensor()(image_pil)
+        return image
+    def brightness_change(self, image):
+        """Change brightness of image"""
+        if random.random() < self.corruption_prob:
+            factor = random.uniform(0.5, 1.5)
+            image = torch.clamp(image * factor, 0, 1)
+        return image
+    def contrast_change(self, image):
+        """Change contrast of image"""
+        if random.random() < self.corruption_prob:
+            mean = image.mean()
+            factor = random.uniform(0.5, 1.5)
+            image = torch.clamp((image - mean) * factor + mean, 0, 1)
+        return image
+    def apply_random_corruption(self, image):
+        """Apply a random corruption to the image"""
+        corruptions = [
+            self.gaussian_noise,
+            self.salt_pepper_noise,
+            self.blur,
+            self.brightness_change,
+            self.contrast_change
+        ]
+        corruption_func = random.choice(corruptions)
+        return corruption_func(image)
+class KidneyStoneDataset(Dataset):
+    """Custom dataset for kidney stone images"""
+    def __init__(self, image_paths, labels, transform=None, corruption_prob=0.0):
+        self.image_paths = image_paths
+        self.labels = labels
+        self.transform = transform
+        self.corruption = ImageCorruption(corruption_prob)
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        image_path = self.image_paths[idx]
+        label = self.labels[idx]
+        # Load image
+        image = Image.open(image_path).convert('RGB')
+        if self.transform:
+            image = self.transform(image)
+        # Apply corruption if specified
+        if self.corruption.corruption_prob > 0:
+            image = self.corruption.apply_random_corruption(image)
+        return image, label
+def load_dataset_paths(datasets=None, subversions=None):
+    """Load image paths and labels from specified datasets and subversions"""
+    all_paths = []
+    all_labels = []
+    # Use all datasets if none specified
+    if datasets is None:
+        datasets = config.DATASETS
+    # Use all subversions if none specified
+    if subversions is None:
+        subversions = config.SUBVERSIONS
+    for dataset_name in datasets:
+        dataset_path = os.path.join(config.DATA_ROOT, dataset_name)
+        if not os.path.exists(dataset_path):
+            print(f"Warning: Dataset path does not exist: {dataset_path}")
+            continue
+        for subversion in subversions:
+            subversion_path = os.path.join(dataset_path, subversion)
+            if not os.path.exists(subversion_path):
+                print(f"Warning: Subversion path does not exist: {subversion_path}")
+                continue
+            # Load training images (extract class from folder structure)
+            train_path = os.path.join(subversion_path, "train")
+            if os.path.exists(train_path):
+                # Get all class folders in train directory
+                class_folders = [d for d in os.listdir(train_path)
+                               if os.path.isdir(os.path.join(train_path, d))]
+                for class_folder in class_folders:
+                    class_path = os.path.join(train_path, class_folder)
+                    # Load all images in this class folder
+                    for img_file in os.listdir(class_path):
+                        if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
+                            img_path = os.path.join(class_path, img_file)
+                            all_paths.append(img_path)
+                            # Create label with class information: "subversion_class"
+                            all_labels.append(f"{subversion}_{class_folder}")
+            # Load test images (extract class from folder structure)
+            test_path = os.path.join(subversion_path, "test")
+            if os.path.exists(test_path):
+                # Get all class folders in test directory
+                class_folders = [d for d in os.listdir(test_path)
+                               if os.path.isdir(os.path.join(test_path, d))]
+                for class_folder in class_folders:
+                    class_path = os.path.join(test_path, class_folder)
+                    # Load all images in this class folder
+                    for img_file in os.listdir(class_path):
+                        if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
+                            img_path = os.path.join(class_path, img_file)
+                            all_paths.append(img_path)
+                            # Create label with class information: "subversion_class"
+                            all_labels.append(f"{subversion}_{class_folder}")
+    print(f"📊 Data loading summary:")
+    print(f"   Total images: {len(all_paths)}")
+    print(f"   Unique classes found: {len(set(all_labels))}")
+    print(f"   Classes: {sorted(set(all_labels))}")
+    return all_paths, all_labels
+def redistribute_data_evenly(image_paths, labels, num_clients):
+    """Redistribute data evenly among clients as fallback"""
+    total_samples = len(image_paths)
+    samples_per_client = total_samples // num_clients
+    # Shuffle data
+    combined = list(zip(image_paths, labels))
+    np.random.shuffle(combined)
+    client_datasets = []
+    for i in range(num_clients):
+        start_idx = i * samples_per_client
+        if i == num_clients - 1:  # Last client gets remaining samples
+            end_idx = total_samples
+        else:
+            end_idx = (i + 1) * samples_per_client
+        client_data = combined[start_idx:end_idx]
+        if client_data:
+            client_paths, client_labels = zip(*client_data)
+            client_datasets.append((list(client_paths), list(client_labels)))
+            print(f"Client {i} redistributed with {len(client_paths)} samples")
+    return client_datasets
+def create_non_iid_distribution(image_paths, labels, num_clients, alpha=0.5):
+    """Create non-IID data distribution using Dirichlet distribution"""
+    # Convert labels to numeric
+    unique_labels = list(set(labels))
+    label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}
+    numeric_labels = [label_to_idx[label] for label in labels]
+    num_classes = len(unique_labels)
+    # Create Dirichlet distribution for each client
+    client_distributions = np.random.dirichlet([alpha] * num_classes, num_clients)
+    # Group data by class
+    class_indices = {i: [] for i in range(num_classes)}
+    for idx, label in enumerate(numeric_labels):
+        class_indices[label].append(idx)
+    # Distribute data to clients
+    client_data = [[] for _ in range(num_clients)]
+    for class_idx in range(num_classes):
+        class_data = class_indices[class_idx]
+        np.random.shuffle(class_data)
+        # Calculate how many samples each client gets from this class
+        total_samples = len(class_data)
+        client_samples = (client_distributions[:, class_idx] * total_samples).astype(int)
+        # Ensure we don't exceed total samples
+        if client_samples.sum() > total_samples:
+            excess = client_samples.sum() - total_samples
+            client_samples[-1] -= excess
+        # Distribute samples
+        start_idx = 0
+        for client_idx, num_samples in enumerate(client_samples):
+            if num_samples > 0:
+                end_idx = start_idx + num_samples
+                client_data[client_idx].extend(class_data[start_idx:end_idx])
+                start_idx = end_idx
+    # Convert indices back to paths and labels
+    client_datasets = []
+    for client_idx, client_indices in enumerate(client_data):
+        if len(client_indices) > 0:  # Accept any client with at least some data
+            client_paths = [image_paths[i] for i in client_indices]
+            client_labels = [labels[i] for i in client_indices]
+            client_datasets.append((client_paths, client_labels))
+            print(f"Client {client_idx} will have {len(client_indices)} samples")
+        else:
+            print(f"Warning: Client {client_idx} has no samples assigned")
+    # If we don't have enough clients, redistribute the data more evenly
+    if len(client_datasets) < num_clients:
+        print(f"Warning: Only {len(client_datasets)} clients have sufficient data. Redistributing...")
+        return redistribute_data_evenly(image_paths, labels, num_clients)
+    return client_datasets
+def safe_train_test_split(paths, labels, test_size=0.2, random_state=None):
+    """
+    Safely split data into train/test, handling classes with insufficient samples
+    """
+    # Count samples per class
+    class_counts = Counter(labels)
+    # Check if we can do stratified split
+    min_class_size = min(class_counts.values())
+    can_stratify = min_class_size >= 2
+    if can_stratify:
+        try:
+            return train_test_split(
+                paths, labels,
+                test_size=test_size,
+                random_state=random_state,
+                stratify=labels
+            )
+        except ValueError as e:
+            print(f"   ⚠️ Stratified split failed: {e}")
+            can_stratify = False
+    if not can_stratify:
+        print(f"   📊 Using random split (some classes have <2 samples)")
+        print(f"   📈 Class distribution: {dict(class_counts)}")
+        # Use random split without stratification
+        return train_test_split(
+            paths, labels,
+            test_size=test_size,
+            random_state=random_state,
+            stratify=None
+        )
+def get_data_transforms():
+    """Get data transformations for training and testing"""
+    train_transform = transforms.Compose([
+        transforms.Resize(config.IMAGE_SIZE),
+        transforms.RandomHorizontalFlip(p=0.5),
+        transforms.RandomRotation(10),
+        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    ])
+    test_transform = transforms.Compose([
+        transforms.Resize(config.IMAGE_SIZE),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    ])
+    return train_transform, test_transform
+def create_client_dataloaders(num_clients, corruption_prob=0.1, alpha=0.5, datasets=None, subversions=None):
+    """Create data loaders for all clients with non-IID distribution"""
+    # Load data from specified datasets and subversions
+    all_paths, all_labels = load_dataset_paths(datasets=datasets, subversions=subversions)
+    print(f"Total images loaded: {len(all_paths)}")
+    print(f"Unique labels: {set(all_labels)}")
+    if len(all_paths) == 0:
+        raise ValueError("No images found! Please check your dataset paths and subversions.")
+    # Create non-IID distribution
+    client_datasets = create_non_iid_distribution(all_paths, all_labels, num_clients, alpha)
+    print(f"Created {len(client_datasets)} client datasets")
+    # Get transforms
+    train_transform, test_transform = get_data_transforms()
+    # Create data loaders for each client
+    client_loaders = []
+    for i, (client_paths, client_labels) in enumerate(client_datasets):
+        print(f"Client {i}: {len(client_paths)} samples")
+        # Split into train/test for each client using safe splitting
+        train_paths, test_paths, train_labels, test_labels = safe_train_test_split(
+            client_paths, client_labels, test_size=0.2, random_state=config.SEED
+        )
+        # Create datasets
+        train_dataset = KidneyStoneDataset(
+            train_paths, train_labels,
+            transform=train_transform,
+            corruption_prob=corruption_prob
+        )
+        test_dataset = KidneyStoneDataset(
+            test_paths, test_labels,
+            transform=test_transform,
+            corruption_prob=0.0  # No corruption for test data
+        )
+        # Create data loaders
+        train_loader = DataLoader(
+            train_dataset,
+            batch_size=config.BATCH_SIZE,
+            shuffle=True,
+            num_workers=2
+        )
+        test_loader = DataLoader(
+            test_dataset,
+            batch_size=config.BATCH_SIZE,
+            shuffle=False,
+            num_workers=2
+        )
+        client_loaders.append((train_loader, test_loader))
+    return client_loaders
+def create_global_test_loader(datasets=None, subversions=None):
+    """Create a global test loader for evaluation"""
+    # Load data from specified datasets and subversions
+    all_paths, all_labels = load_dataset_paths(datasets=datasets, subversions=subversions)
+    if len(all_paths) == 0:
+        raise ValueError("No images found for global test loader! Please check your dataset paths and subversions.")
+    # Use a subset for global testing with safe splitting
+    _, test_paths, _, test_labels = safe_train_test_split(
+        all_paths, all_labels, test_size=0.1, random_state=config.SEED
+    )
+    _, test_transform = get_data_transforms()
+    test_dataset = KidneyStoneDataset(
+        test_paths, test_labels,
+        transform=test_transform,
+        corruption_prob=0.0
+    )
+    test_loader = DataLoader(
+        test_dataset,
+        batch_size=config.BATCH_SIZE,
+        shuffle=False,
+        num_workers=2
+    )
+    return test_loader

utils/metrics.py ADDED Viewed

	@@ -0,0 +1,316 @@

+"""
+Metrics utilities for federated autoencoder evaluation
+"""
+import torch
+import numpy as np
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
+from sklearn.metrics import classification_report, silhouette_score
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import LabelEncoder
+import torch.nn.functional as F
+def calculate_reconstruction_metrics(model, data_loader, device):
+    """
+    Calculate reconstruction-based metrics for autoencoder
+    Args:
+        model: Trained autoencoder model
+        data_loader: DataLoader with test data
+        device: Device to run evaluation on
+    Returns:
+        dict: Dictionary containing reconstruction metrics
+    """
+    model.eval()
+    reconstruction_errors = []
+    total_loss = 0.0
+    num_samples = 0
+    criterion = torch.nn.MSELoss(reduction='none')
+    with torch.no_grad():
+        for data, labels in data_loader:
+            data = data.to(device)
+            # Forward pass
+            reconstructed, latent = model(data)
+            # Calculate per-sample reconstruction error
+            batch_errors = criterion(reconstructed, data).view(data.size(0), -1).mean(dim=1)
+            reconstruction_errors.extend(batch_errors.cpu().numpy())
+            # Calculate total loss
+            total_loss += F.mse_loss(reconstructed, data).item() * data.size(0)
+            num_samples += data.size(0)
+    reconstruction_errors = np.array(reconstruction_errors)
+    # Calculate reconstruction statistics
+    avg_loss = total_loss / num_samples
+    avg_reconstruction_error = np.mean(reconstruction_errors)
+    std_reconstruction_error = np.std(reconstruction_errors)
+    median_reconstruction_error = np.median(reconstruction_errors)
+    # Calculate reconstruction quality metrics (lower is better)
+    # Use percentiles to define "good" vs "poor" reconstruction
+    percentile_25 = np.percentile(reconstruction_errors, 25)
+    percentile_75 = np.percentile(reconstruction_errors, 75)
+    # Define good reconstruction as bottom 25% of errors
+    good_reconstruction = reconstruction_errors <= percentile_25
+    poor_reconstruction = reconstruction_errors >= percentile_75
+    # For autoencoder evaluation, we'll use a more meaningful approach:
+    # Compare reconstruction quality across different error thresholds
+    # Method 1: Use median as threshold (more stable than mean)
+    median_threshold = np.median(reconstruction_errors)
+    better_than_median = (reconstruction_errors <= median_threshold).astype(int)
+    # Method 2: Use a stricter threshold (25th percentile) for "good" reconstructions
+    strict_threshold = np.percentile(reconstruction_errors, 25)
+    high_quality = (reconstruction_errors <= strict_threshold).astype(int)
+    # Calculate "precision" as: how many predicted good are actually good
+    # This is more like "consistency" - if we predict good, how often is it actually good?
+    # For binary classification metrics, we need to define what we're classifying
+    # Let's classify: "Is this reconstruction better than average?"
+    # Ground truth: better than median (50% of samples)
+    true_better_than_median = better_than_median
+    # Prediction: better than 40th percentile (slightly more lenient)
+    prediction_threshold = np.percentile(reconstruction_errors, 40)
+    predicted_better = (reconstruction_errors <= prediction_threshold).astype(int)
+    # Calculate metrics - but note these are somewhat artificial for autoencoders
+    accuracy = accuracy_score(true_better_than_median, predicted_better)
+    precision = precision_score(true_better_than_median, predicted_better, average='binary', zero_division=0)
+    recall = recall_score(true_better_than_median, predicted_better, average='binary', zero_division=0)
+    f1 = f1_score(true_better_than_median, predicted_better, average='binary', zero_division=0)
+    # Add a note about what these metrics mean
+    classification_note = (
+        "Note: Classification metrics compare 40th vs 50th percentile thresholds. "
+        "Perfect scores may indicate threshold alignment rather than model quality."
+    )
+    return {
+        'loss': avg_loss,
+        'reconstruction_error': avg_reconstruction_error,
+        'reconstruction_std': std_reconstruction_error,
+        'reconstruction_median': median_reconstruction_error,
+        'reconstruction_25th': percentile_25,
+        'reconstruction_75th': percentile_75,
+        'accuracy': accuracy,
+        'precision': precision,
+        'recall': recall,
+        'f1_score': f1,
+        'num_samples': num_samples,
+        'good_reconstructions': np.sum(good_reconstruction),
+        'poor_reconstructions': np.sum(poor_reconstruction),
+        'classification_note': classification_note,
+        'better_than_median_count': np.sum(better_than_median),
+        'high_quality_count': np.sum(high_quality)
+    }
+def calculate_latent_classification_metrics(model, data_loader, device):
+    """
+    Calculate classification metrics using latent space representations
+    Args:
+        model: Trained autoencoder model
+        data_loader: DataLoader with test data
+        device: Device to run evaluation on
+    Returns:
+        dict: Dictionary containing latent space metrics
+    """
+    model.eval()
+    latent_features = []
+    true_labels = []
+    with torch.no_grad():
+        for data, labels in data_loader:
+            data = data.to(device)
+            # Get latent representations
+            _, latent = model(data)
+            latent_features.append(latent.cpu().numpy())
+            true_labels.extend(labels)
+    # Combine all latent features
+    latent_features = np.vstack(latent_features)
+    # Encode string labels to numeric
+    label_encoder = LabelEncoder()
+    numeric_labels = label_encoder.fit_transform(true_labels)
+    unique_labels = np.unique(numeric_labels)
+    n_classes = len(unique_labels)
+    print(f"   🔍 Latent analysis: {n_classes} unique classes found")
+    print(f"   📊 Class distribution: {dict(zip(label_encoder.classes_, np.bincount(numeric_labels)))}")
+    if n_classes == 1:
+        # Single class case - no meaningful classification possible
+        return {
+            'latent_accuracy': 0.0,  # No classification possible
+            'latent_precision': 0.0,
+            'latent_recall': 0.0,
+            'latent_f1_score': 0.0,
+            'silhouette_score': 0.0,  # No clustering possible
+            'n_clusters': n_classes,
+            'latent_dim': latent_features.shape[1],
+            'cluster_quality': 'single_class'
+        }
+    # Perform clustering
+    try:
+        # Use the actual number of classes for clustering
+        kmeans = KMeans(n_clusters=n_classes, random_state=42, n_init=10)
+        cluster_predictions = kmeans.fit_predict(latent_features)
+        # Calculate silhouette score for cluster quality
+        if n_classes > 1 and len(set(cluster_predictions)) > 1:
+            silhouette = silhouette_score(latent_features, cluster_predictions)
+        else:
+            silhouette = 0.0
+        # For meaningful classification metrics, we need to align cluster labels with true labels
+        # This is a complex problem, so we'll use a simpler approach:
+        # Calculate how well the clustering separates the true classes
+        # Method 1: Direct comparison (may not be meaningful due to label permutation)
+        accuracy_direct = accuracy_score(numeric_labels, cluster_predictions)
+        # Method 2: Best possible alignment between clusters and true labels
+        try:
+            from scipy.optimize import linear_sum_assignment
+            from sklearn.metrics import confusion_matrix
+            # Create confusion matrix
+            cm = confusion_matrix(numeric_labels, cluster_predictions)
+            # Find best assignment using Hungarian algorithm
+            if cm.shape[0] == cm.shape[1]:  # Same number of clusters and classes
+                row_ind, col_ind = linear_sum_assignment(-cm)  # Negative for maximization
+                aligned_predictions = np.zeros_like(cluster_predictions)
+                for i, j in zip(row_ind, col_ind):
+                    aligned_predictions[cluster_predictions == j] = i
+                # Calculate metrics with aligned labels
+                accuracy = accuracy_score(numeric_labels, aligned_predictions)
+                precision = precision_score(numeric_labels, aligned_predictions, average='weighted', zero_division=0)
+                recall = recall_score(numeric_labels, aligned_predictions, average='weighted', zero_division=0)
+                f1 = f1_score(numeric_labels, aligned_predictions, average='weighted', zero_division=0)
+                cluster_quality = 'aligned'
+            else:
+                # Different number of clusters and classes - use direct comparison
+                accuracy = accuracy_direct
+                precision = precision_score(numeric_labels, cluster_predictions, average='weighted', zero_division=0)
+                recall = recall_score(numeric_labels, cluster_predictions, average='weighted', zero_division=0)
+                f1 = f1_score(numeric_labels, cluster_predictions, average='weighted', zero_division=0)
+                cluster_quality = 'unaligned'
+        except ImportError:
+            print(f"   ⚠️ scipy not available, using direct comparison")
+            # Fallback to direct comparison without alignment
+            accuracy = accuracy_direct
+            precision = precision_score(numeric_labels, cluster_predictions, average='weighted', zero_division=0)
+            recall = recall_score(numeric_labels, cluster_predictions, average='weighted', zero_division=0)
+            f1 = f1_score(numeric_labels, cluster_predictions, average='weighted', zero_division=0)
+            cluster_quality = 'direct'
+    except Exception as e:
+        print(f"   ⚠️ Clustering failed: {e}")
+        accuracy = precision = recall = f1 = silhouette = 0.0
+        cluster_quality = 'failed'
+    return {
+        'latent_accuracy': accuracy,
+        'latent_precision': precision,
+        'latent_recall': recall,
+        'latent_f1_score': f1,
+        'silhouette_score': silhouette,
+        'n_clusters': n_classes,
+        'latent_dim': latent_features.shape[1],
+        'cluster_quality': cluster_quality
+    }
+def calculate_comprehensive_metrics(model, data_loader, device):
+    """
+    Calculate comprehensive metrics for autoencoder evaluation
+    Args:
+        model: Trained autoencoder model
+        data_loader: DataLoader with test data
+        device: Device to run evaluation on
+    Returns:
+        dict: Dictionary containing all metrics
+    """
+    print(f"   🔄 Calculating reconstruction metrics...")
+    recon_metrics = calculate_reconstruction_metrics(model, data_loader, device)
+    print(f"   🧠 Calculating latent space metrics...")
+    latent_metrics = calculate_latent_classification_metrics(model, data_loader, device)
+    # Combine all metrics
+    comprehensive_metrics = {
+        **recon_metrics,
+        **latent_metrics
+    }
+    return comprehensive_metrics
+def print_metrics_summary(metrics, subversion_name):
+    """Print a formatted summary of metrics"""
+    print(f"\n📊 Metrics Summary for {subversion_name}:")
+    print("=" * 60)
+    # Reconstruction metrics
+    print(f"🔄 Reconstruction Loss:     {metrics['loss']:.6f}")
+    print(f"📏 Reconstruction Error:    {metrics['reconstruction_error']:.6f} ± {metrics['reconstruction_std']:.6f}")
+    print(f"📊 Reconstruction Median:   {metrics['reconstruction_median']:.6f}")
+    print(f"📈 25th/75th Percentile:    {metrics['reconstruction_25th']:.6f} / {metrics['reconstruction_75th']:.6f}")
+    print(f"✅ Good Reconstructions:    {metrics['good_reconstructions']}/{metrics['num_samples']} ({100*metrics['good_reconstructions']/metrics['num_samples']:.1f}%)")
+    print(f"❌ Poor Reconstructions:    {metrics['poor_reconstructions']}/{metrics['num_samples']} ({100*metrics['poor_reconstructions']/metrics['num_samples']:.1f}%)")
+    print(f"🎯 Better than Median:     {metrics['better_than_median_count']}/{metrics['num_samples']} ({100*metrics['better_than_median_count']/metrics['num_samples']:.1f}%)")
+    print(f"⭐ High Quality (top 25%):  {metrics['high_quality_count']}/{metrics['num_samples']} ({100*metrics['high_quality_count']/metrics['num_samples']:.1f}%)")
+    # Classification metrics with explanation
+    print(f"\n🎯 Reconstruction Classification Metrics:")
+    print(f"   Accuracy:               {metrics['accuracy']:.4f}")
+    print(f"   Precision:              {metrics['precision']:.4f}")
+    print(f"   Recall:                 {metrics['recall']:.4f}")
+    print(f"   F1-Score:               {metrics['f1_score']:.4f}")
+    print(f"   ℹ️  {metrics['classification_note']}")
+    # Latent space metrics
+    print(f"\n🧠 Latent Space Analysis:")
+    print(f"   Latent Accuracy:        {metrics['latent_accuracy']:.4f}")
+    print(f"   Latent Precision:       {metrics['latent_precision']:.4f}")
+    print(f"   Latent Recall:          {metrics['latent_recall']:.4f}")
+    print(f"   Latent F1-Score:        {metrics['latent_f1_score']:.4f}")
+    print(f"   Silhouette Score:       {metrics['silhouette_score']:.4f}")
+    print(f"   Clusters Found:         {metrics['n_clusters']}")
+    print(f"   Latent Dimension:       {metrics['latent_dim']}")
+    print(f"   Cluster Quality:        {metrics['cluster_quality']}")
+    # Interpretation guide
+    print(f"\n📖 Interpretation Guide:")
+    print(f"   • Reconstruction Loss: Lower = better image reconstruction")
+    print(f"   • Latent Accuracy: How well clustering separates kidney stone classes")
+    print(f"   • Silhouette Score: Quality of latent space clustering (higher = better)")
+    print(f"   • Perfect precision (1.0) in reconstruction metrics may indicate")
+    print(f"     threshold alignment rather than exceptional model performance")
+    print(f"\n📦 Total Samples:          {metrics['num_samples']}")
+    print("=" * 60)