Spaces:

kfoughali
/

serpent

Sleeping

App Files Files Community

kfoughali commited on Sep 27

Commit

aaa4c3e

verified ·

1 Parent(s): 314f617

Update app.py

Browse files

Files changed (1) hide show

app.py +507 -58

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 Enhanced SPG: Multi-Stage Magnitude-Position Guided KV Cache Compression for GPT-Neo 2.7B
 RESEARCH-GRADE: 450x compression with FULL non-negotiables compliance
 NO ESTIMATIONS, NO FALLBACKS, NO HARDCODING - FAIL FAST ON ANY ERROR
 """
 import gradio as gr
@@ -38,6 +39,7 @@ import subprocess
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use('Agg')  # Non-interactive backend
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -353,6 +355,41 @@ class EnhancedSPGConfig:
         else:
             return self.kernel_size_xlarge_seq
 @dataclass
 class ProvingConfig:
     """Configuration for attestable proof generation and verification - NO HARDCODING."""
@@ -387,6 +424,9 @@ class CompressionConfig:
     # Enhanced SPG configuration
     enhanced_spg_config: EnhancedSPGConfig = field(default_factory=EnhancedSPGConfig)
     # Proving configuration
     proving: ProvingConfig = field(default_factory=ProvingConfig)
@@ -685,7 +725,8 @@ def export_proof_bundle(bundle_dir: str, config: CompressionConfig,
         "strict_flags": {
             "fail_on_cpu_fallback": config.fail_on_cpu_fallback,
             "proving_enabled": config.proving.enabled,
-            "require_cuda": config.proving.require_cuda
         }
     }
@@ -1152,6 +1193,148 @@ def plot_compression_tradeoff(summaries_by_ratio: Dict[float, Dict[str, Any]],
     logger.info(f"Compression trade-off plots saved: {plot_path}")
     return plot_path
 def generate_comparison_plots(summaries: Dict[str, Any], metrics_dict: Dict[str, Any] = None) -> str:
     """Generate publication-grade comparison plots. Returns filepath."""
     fig, axes = plt.subplots(1, 3, figsize=(16, 5))
@@ -2310,6 +2493,151 @@ def load_real_dataset_samples(config: CompressionConfig, tokenizer) -> List[str]
     logger.info(f"Loaded {len(texts)} text samples from {config.dataset_name}")
     return texts
 def run_research_benchmark(model_name: str, config: CompressionConfig, dataset_texts: Optional[List[str]] = None) -> Tuple[BenchmarkMetrics, Dict, List[Dict], List[Dict]]:
     """Research-grade benchmark with enhanced SPG support and fail-fast validation. Returns metrics, summary, and proof records."""
     logger.info(f"Starting research benchmark: {model_name} with {config.compression_type.value}")
@@ -2645,12 +2973,12 @@ def run_research_benchmark(model_name: str, config: CompressionConfig, dataset_t
     return final_metrics, summary, per_sample_records, per_layer_fingerprints
-def generate_latex_table(results: List[Dict[str, Any]]) -> str:
-    """Generate LaTeX table with enhanced SPG results."""
     latex = r"""\begin{table}[htbp]
 \centering
-\caption{Enhanced SPG: Research Standards Compliant 450x Compression on GPT-Neo}
-\label{tab:enhanced_spg_450x_compliant_gptneo}
 \begin{tabular}{lcccccccc}
 \toprule
 Method & Peak Mem. & KV Mem. & Decode & Prefill PPL & Gen. PPL & Compr. & Bits/Token & Aux. OH \\
@@ -2677,15 +3005,34 @@ Method & Peak Mem. & KV Mem. & Decode & Prefill PPL & Gen. PPL & Compr. & Bits/T
         latex += f"{method} & {peak_mem} & {kv_mem} & {decode} & {prefill_ppl} & {gen_ppl} & {comp} & {bits_per_token} & {aux_overhead} \\\\\n"
     latex += r"""\bottomrule
 \end{tabular}
-\parbox{\textwidth}{\footnotesize Enhanced SPG achieving 450x compression on GPT-Neo with full non-negotiables compliance}
 \end{table}"""
     return latex
 def create_research_interface():
-    """Research-grade interface for GPT-Neo with STRICT non-negotiables compliance and proving protocol."""
     def run_benchmark(model_variant, compression_types, seq_length, eval_samples,
                       dataset_name, dataset_config,
@@ -2702,8 +3049,9 @@ def create_research_interface():
                       sequence_compression_ratio, head_compression_ratio,
                       generate_latex, n_bootstrap, n_seeds, enable_proving,
                       enable_ratio_sweep, ratio_sweep_points,
                       progress=gr.Progress()):
-        """Run 450x compression benchmark with FULL compliance and proving protocol."""
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model_name = f"EleutherAI/gpt-neo-{model_variant}"
@@ -2713,6 +3061,7 @@ def create_research_interface():
         all_summaries = {}
         all_per_sample_records = {}
         all_per_layer_fingerprints = {}
         # For ratio sweep
         summaries_by_ratio = {}
@@ -2740,7 +3089,8 @@ def create_research_interface():
                 "configurable_parameters": True,
                 "fail_on_cpu_fallback": True,  # STRICT COMPLIANCE
                 "no_proxy_metrics": True,
-                "proving_enabled": enable_proving
             },
             "target_compression": target_compression_ratio
         }
@@ -2751,23 +3101,72 @@ def create_research_interface():
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
-        temp_config = CompressionConfig(
-            prefill_length=seq_length,
-            generation_length=64,
             eval_samples=eval_samples,
             dataset_name=dataset_name,
             dataset_config=dataset_config if dataset_config else None,
-            fail_on_cpu_fallback=True,  # STRICT COMPLIANCE
-            proving=ProvingConfig(enabled=enable_proving)
         )
-        shared_texts = load_real_dataset_samples(temp_config, tokenizer)
         progress(0.1, desc=f"Starting 450x compression benchmark on GPT-Neo {model_variant}...")
         # Loop over compression ratios if sweep enabled
         for ratio_idx, test_ratio in enumerate(compression_ratios):
             if enable_ratio_sweep:
-                progress((0.1 + 0.7 * ratio_idx / len(compression_ratios)),
                         desc=f"Testing ratio {test_ratio}x...")
             ratio_summaries = {}
@@ -2775,7 +3174,7 @@ def create_research_interface():
             for i, comp_type in enumerate(compression_types):
                 if not enable_ratio_sweep:
-                    progress((0.1 + 0.8 * i / len(compression_types)), desc=f"Evaluating {comp_type}...")
                 # Skip NONE for non-1x ratios in sweep
                 if enable_ratio_sweep and comp_type == "NONE" and test_ratio != 1:
@@ -2819,9 +3218,9 @@ def create_research_interface():
                         stage_compression_min=stage_compression_min,
                         stage_compression_max=stage_compression_max,
                         recent_window=recent_window,
-                        recent_min_precision=1.0,  # Always full precision for recent
                         head_fp16_reserve=head_fp16_reserve,
-                        quality_threshold=0.01  # Tighter 1% threshold
                     )
                     config = CompressionConfig(
@@ -2892,13 +3291,14 @@ def create_research_interface():
         df = pd.DataFrame(results)
-        # Prepare export data (ensure all keys are strings for JSON serialization)
         export_data = {
             "configuration": benchmark_config,
             "results": all_summaries,
             "summary_table": results,
             "statistical_tests": {},
-            "compression_sweep": {str(k): v for k, v in summaries_by_ratio.items()} if enable_ratio_sweep and summaries_by_ratio else None
         }
         # Add statistical comparisons to export
@@ -2934,12 +3334,12 @@ def create_research_interface():
                         'prefill_perplexity': float(result_summary["Prefill PPL"]),
                         'generation_perplexity': float(result_summary["Gen. PPL"]),
                         'compression_ratio': float(result_summary["Compression Ratio"][:-1]),
-                        'spg_avg_bits_per_token': 16.0,  # Simplified
                         'enhanced_spg_auxiliary_overhead_mb': all_summaries[comp_type].get('enhanced_spg_measured_auxiliary_overhead_mb', 0)
                     })
             if latex_results:
-                latex_output = generate_latex_table(latex_results)
                 export_data["latex_table"] = latex_output
         # Determine achieved compression
@@ -2960,22 +3360,22 @@ def create_research_interface():
         proof_bundle_path = None
         verification_result = None
         plots_path = None
         verification_msg = ""
         if enable_proving and all_per_sample_records:
             try:
-                # Include BOTH baseline and optimized in proof bundle
                 combined_records = []
                 combined_fingerprints = []
                 methods_in_bundle = []
-                # Add all methods' records (baseline + optimized)
                 for method in all_per_sample_records:
                     combined_records.extend(all_per_sample_records[method])
                     combined_fingerprints.extend(all_per_layer_fingerprints.get(method, []))
                     methods_in_bundle.append(method)
-                # Choose primary method for verification (optimized preferred)
                 if "PROGRESSIVE_SPG" in all_summaries:
                     method_for_proof = "PROGRESSIVE_SPG"
                 elif "ENHANCED_SPG" in all_summaries:
@@ -2986,31 +3386,29 @@ def create_research_interface():
                 logger.info(f"Proof bundle includes: {methods_in_bundle}, verifying: {method_for_proof}")
-                # Use primary method's summary for verification
                 summary_for_proof = all_summaries[method_for_proof]
                 metrics_for_proof = all_metrics[method_for_proof]
-                # Add extra metadata to summary
                 summary_for_proof["methods_included"] = methods_in_bundle
                 summary_for_proof["primary_method"] = method_for_proof
                 if "NONE" in all_summaries:
                     summary_for_proof["baseline_kv_mb"] = all_summaries["NONE"].get("kv_cache_memory_mb", 0)
                     summary_for_proof["baseline_decode_ms"] = all_summaries["NONE"].get("decode_time_ms", 0)
-                # Export proof bundle with ALL methods' records
                 bundle_dir = os.path.join(tempfile.gettempdir(), f"proof_bundle_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
                 proof_bundle_path = export_proof_bundle(
                     bundle_dir,
-                    temp_config,
-                    metrics_for_proof,        # Primary method metrics
-                    summary_for_proof,        # Enhanced summary with metadata
-                    combined_records,         # ALL methods' records
-                    combined_fingerprints     # ALL methods' fingerprints
                 )
-                # Verify the same bundle immediately
                 verification_result = verify_proof_bundle(
-                    bundle_dir, temp_config, temp_config.proving
                 )
                 if verification_result["ok"]:
@@ -3019,7 +3417,6 @@ def create_research_interface():
                 else:
                     verification_msg = f"❌ **Proof Verification: FAILED**\n{verification_result['failures']}"
                     logger.error(f"PROOF VERIFICATION FAILED: {verification_result['failures']}")
-                    # In CI, this would hard-fail
                     if os.environ.get("CI") == "true":
                         raise RuntimeError(f"CI VERIFICATION FAILED: {verification_result['failures']}")
@@ -3047,6 +3444,14 @@ def create_research_interface():
                 logger.error(f"Failed to generate trade-off plots: {e}")
                 tradeoff_path = None
         # Get layer count for display
         n_layers = {
             "125M": 12,
@@ -3054,6 +3459,14 @@ def create_research_interface():
             "2.7B": 32
         }.get(model_variant, "?")
         summary_text = f"""
         ## 🎯 450x Compression on GPT-Neo {model_variant} with FULL Non-Negotiables Compliance
@@ -3074,6 +3487,7 @@ def create_research_interface():
         {'✅ Proof bundle generated' if proof_bundle_path else ''}
         {verification_msg}
         {'✅ Compression trade-off plots generated' if tradeoff_path else ''}
         **GPT-Neo Specific Settings:**
         - {n_layers} transformer layers (auto-detected)
@@ -3082,6 +3496,7 @@ def create_research_interface():
         - Recent Window: {recent_window} tokens
         - Stage 1 Compression: {enhanced_stage1_ratio}x
         - Stage 2 Compression: {enhanced_stage2_ratio}x
         """
         # Prepare trade-off data for export
@@ -3099,7 +3514,7 @@ def create_research_interface():
                 }
             }
-        return df, summary_text, latex_output, export_data, proof_bundle_path, plots_path, tradeoff_path, tradeoff_data
     def save_json_file(json_data):
         """Create downloadable JSON file."""
@@ -3122,9 +3537,9 @@ def create_research_interface():
         return filepath
-    with gr.Blocks(title="GPT-Neo Enhanced SPG: 450x Compression - FULL COMPLIANCE", theme=gr.themes.Soft()) as demo:
         gr.Markdown(f"""
-        # 🎯 GPT-Neo Enhanced SPG: 450x Compression with FULL Non-Negotiables Compliance
         **GPT-Neo Capabilities:**
         - **Max Sequence Length:** {GPT_NEO_MAX_SEQUENCE_LENGTH} tokens (full 2048 context)
@@ -3142,6 +3557,7 @@ def create_research_interface():
         - ✅ NO fake results - Reproducible
         - ✅ Clean code - Full validation
         - ✅ Hardware validation - GPU memory checked
         """)
         with gr.Row():
@@ -3218,6 +3634,26 @@ def create_research_interface():
                         sequence_compression_ratio = gr.Slider(0.0001, 0.001, value=0.00018, step=0.00002, label="Sequence Ratio")
                         head_compression_ratio = gr.Slider(0.0001, 0.001, value=0.00018, step=0.00002, label="Head Ratio")
                 with gr.Accordion("Compliance Parameters (NO HARDCODING)", open=False):
                     quality_feedback_frequency = gr.Slider(1, 64, value=16, step=1, label="Quality Feedback Frequency")
                     recent_boost_factor = gr.Slider(0.0, 1.0, value=0.1, step=0.01, label="Recent Boost Factor")
@@ -3238,7 +3674,7 @@ def create_research_interface():
                     ratio_sweep_points = gr.Slider(3, 8, value=5, step=1,
                                                   label="Sweep Points (1× to 450×)")
-                run_button = gr.Button("🎯 Run GPT-Neo 450x Benchmark (STRICT COMPLIANCE)", variant="primary")
             with gr.Column(scale=2):
                 results_table = gr.DataFrame(label="GPT-Neo 450x Compression Results")
@@ -3264,6 +3700,9 @@ def create_research_interface():
                         tradeoff_json = gr.JSON(label="Trade-off Data", visible=False)
                         export_tradeoff_button = gr.Button("📊 Export Trade-off Data", variant="secondary")
                         download_tradeoff_file = gr.File(label="Download Trade-off JSON", visible=False)
         # Connect the benchmark
         benchmark_outputs = run_button.click(
@@ -3282,9 +3721,11 @@ def create_research_interface():
                    min_tokens_for_stability, stage_compression_min, stage_compression_max,
                    sequence_compression_ratio, head_compression_ratio,
                    generate_latex, n_bootstrap, n_seeds, enable_proving,
-                   enable_ratio_sweep, ratio_sweep_points],
             outputs=[results_table, summary_output, latex_output, json_output,
-                    proof_bundle_file, plots_image, tradeoff_plots, tradeoff_json]
         )
         # Export functionality
@@ -3308,7 +3749,27 @@ def create_research_interface():
         )
         gr.Markdown(f"""
-        ### 🔬 GPT-Neo Architecture Details
         **Model Specifications:**
         - **GPT-Neo 125M**: 12 layers, 768 hidden dim, 12 heads
@@ -3321,19 +3782,6 @@ def create_research_interface():
         - **1.3B**: Minimum 6GB VRAM
         - **2.7B**: Minimum 12GB VRAM (16GB+ recommended)
-        **Optimal Datasets for GPT-Neo:**
-        - **WikiText**: Clean Wikipedia articles
-        - **OpenWebText**: High-quality web text (GPT-2 training data recreation)
-        - **The Pile**: 800GB diverse text corpus
-        - **C4**: Colossal Clean Crawled Corpus
-        **Compression Adjustments for GPT-Neo:**
-        - Adjusted stage compression ratios for architecture
-        - Optimized recent window for layer count
-        - Reserved FP16 heads tuned per model size
-        - Memory cleanup for 2.7B model
-        - Full 2048 token context support
         ### 📦 Proving Protocol Features
         **Attestable Proof Bundle (.zip) contains:**
@@ -3341,6 +3789,7 @@ def create_research_interface():
         - Per-sample raw measurements
         - Layer-level compression fingerprints
         - Exact package versions for reproducibility
         **Verification:**
         - Recomputes summary from raw records
@@ -3348,7 +3797,7 @@ def create_research_interface():
         - Checks numerical tolerances
         - Hard-fails in CI if verification fails
-        This ensures research-grade reproducibility on GPT-Neo models with full 2048 token context.
         """)
     return demo

 Enhanced SPG: Multi-Stage Magnitude-Position Guided KV Cache Compression for GPT-Neo 2.7B
 RESEARCH-GRADE: 450x compression with FULL non-negotiables compliance
 NO ESTIMATIONS, NO FALLBACKS, NO HARDCODING - FAIL FAST ON ANY ERROR
+WITH COMPREHENSIVE ABLATION STUDY
 """
 import gradio as gr
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use('Agg')  # Non-interactive backend
+import itertools
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
         else:
             return self.kernel_size_xlarge_seq
+@dataclass
+class AblationConfig:
+    """Configuration for ablation study - NO HARDCODING."""
+    enabled: bool = True
+    test_stage1_only: bool = True           # Test Stage 1 in isolation
+    test_stage2_only: bool = True           # Test Stage 2 in isolation
+    test_no_head_compression: bool = True   # Disable head compression
+    test_no_adaptive_decomp: bool = True    # Disable adaptive decomposition
+    test_no_hsa: bool = True                # Disable hybrid sparse attention
+    test_no_snapkv: bool = True             # Disable SnapKV++
+    test_conservative_precision: bool = True # Test with conservative precision
+    test_conservative_magnitude: bool = True # Test with conservative magnitude threshold
+    test_no_recent_window: bool = True      # Remove recent window protection
+    test_reduced_fp16_heads: bool = True    # Test with fewer FP16 reserved heads
+    # Component combinations to test
+    test_combinations: bool = True          # Test various combinations of components
+    combination_configs: List[Dict[str, bool]] = field(default_factory=lambda: [
+        {"stage1": True, "stage2": False, "head_comp": False},   # Stage 1 only
+        {"stage1": False, "stage2": True, "head_comp": False},   # Stage 2 only
+        {"stage1": True, "stage2": True, "head_comp": False},    # Both stages, no head
+        {"stage1": True, "stage2": True, "head_comp": True},     # Full system
+    ])
+    # Evaluation parameters
+    eval_samples_per_config: int = 5       # Samples per ablation configuration
+    n_seeds: int = 2                        # Seeds for stability
+    def __post_init__(self):
+        """Validate ablation parameters."""
+        if self.eval_samples_per_config <= 0:
+            raise ValueError(f"eval_samples_per_config must be positive, got {self.eval_samples_per_config}")
+        if self.n_seeds <= 0:
+            raise ValueError(f"n_seeds must be positive, got {self.n_seeds}")
 @dataclass
 class ProvingConfig:
     """Configuration for attestable proof generation and verification - NO HARDCODING."""
     # Enhanced SPG configuration
     enhanced_spg_config: EnhancedSPGConfig = field(default_factory=EnhancedSPGConfig)
+    # Ablation study configuration
+    ablation: AblationConfig = field(default_factory=AblationConfig)
     # Proving configuration
     proving: ProvingConfig = field(default_factory=ProvingConfig)
         "strict_flags": {
             "fail_on_cpu_fallback": config.fail_on_cpu_fallback,
             "proving_enabled": config.proving.enabled,
+            "require_cuda": config.proving.require_cuda,
+            "ablation_enabled": config.ablation.enabled
         }
     }
     logger.info(f"Compression trade-off plots saved: {plot_path}")
     return plot_path
+def plot_ablation_results(ablation_results: Dict[str, Dict[str, Any]], baseline_summary: Dict[str, Any]) -> str:
+    """Generate publication-grade ablation study plots."""
+    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
+    # Prepare data
+    configs = list(ablation_results.keys())
+    compression_ratios = [ablation_results[c]['summary']['compression_ratio'] for c in configs]
+    gen_ppls = [ablation_results[c]['summary']['generation_perplexity'] for c in configs]
+    decode_times = [ablation_results[c]['summary']['decode_time_ms'] for c in configs]
+    kv_memories = [ablation_results[c]['summary']['kv_cache_memory_mb'] for c in configs]
+    throughputs = [ablation_results[c]['summary'].get('end_to_end_throughput', 0) for c in configs]
+    baseline_gen_ppl = baseline_summary['generation_perplexity']
+    baseline_decode_time = baseline_summary['decode_time_ms']
+    baseline_kv_memory = baseline_summary['kv_cache_memory_mb']
+    baseline_throughput = baseline_summary.get('end_to_end_throughput', 0)
+    # 1. Compression Ratio by Component
+    ax1 = axes[0, 0]
+    bars1 = ax1.bar(range(len(configs)), compression_ratios, color='steelblue')
+    ax1.set_xticks(range(len(configs)))
+    ax1.set_xticklabels(configs, rotation=45, ha='right')
+    ax1.set_ylabel('Compression Ratio')
+    ax1.set_title('(a) Compression Achievement by Configuration')
+    ax1.axhline(y=450, color='red', linestyle='--', alpha=0.5, label='Target (450×)')
+    ax1.legend()
+    ax1.grid(True, alpha=0.3)
+    # Annotate bars
+    for i, (bar, val) in enumerate(zip(bars1, compression_ratios)):
+        ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
+                f'{val:.0f}×', ha='center', va='bottom', fontsize=8)
+    # 2. Generation Perplexity Impact
+    ax2 = axes[0, 1]
+    ppl_increase = [(p / baseline_gen_ppl - 1) * 100 for p in gen_ppls]
+    colors = ['green' if inc < 5 else 'orange' if inc < 10 else 'red' for inc in ppl_increase]
+    bars2 = ax2.bar(range(len(configs)), ppl_increase, color=colors)
+    ax2.set_xticks(range(len(configs)))
+    ax2.set_xticklabels(configs, rotation=45, ha='right')
+    ax2.set_ylabel('PPL Increase (%)')
+    ax2.set_title('(b) Quality Degradation from Baseline')
+    ax2.axhline(y=0, color='black', linestyle='-', alpha=0.5)
+    ax2.axhline(y=10, color='red', linestyle='--', alpha=0.5, label='10% threshold')
+    ax2.legend()
+    ax2.grid(True, alpha=0.3)
+    # Annotate
+    for i, (bar, val) in enumerate(zip(bars2, ppl_increase)):
+        ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
+                f'+{val:.1f}%', ha='center', va='bottom', fontsize=8)
+    # 3. Memory Savings
+    ax3 = axes[0, 2]
+    memory_reduction = [(1 - m/baseline_kv_memory) * 100 for m in kv_memories]
+    bars3 = ax3.bar(range(len(configs)), memory_reduction, color='darkgreen')
+    ax3.set_xticks(range(len(configs)))
+    ax3.set_xticklabels(configs, rotation=45, ha='right')
+    ax3.set_ylabel('Memory Reduction (%)')
+    ax3.set_title('(c) KV Cache Memory Savings')
+    ax3.grid(True, alpha=0.3)
+    # Annotate
+    for i, (bar, val) in enumerate(zip(bars3, memory_reduction)):
+        ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
+                f'-{val:.1f}%', ha='center', va='bottom', fontsize=8)
+    # 4. Decode Latency
+    ax4 = axes[1, 0]
+    speedup = [baseline_decode_time / d for d in decode_times]
+    bars4 = ax4.bar(range(len(configs)), speedup, color='purple')
+    ax4.set_xticks(range(len(configs)))
+    ax4.set_xticklabels(configs, rotation=45, ha='right')
+    ax4.set_ylabel('Speedup Factor')
+    ax4.set_title('(d) Decode Speedup vs Baseline')
+    ax4.axhline(y=1.0, color='black', linestyle='-', alpha=0.5, label='Baseline')
+    ax4.legend()
+    ax4.grid(True, alpha=0.3)
+    # Annotate
+    for i, (bar, val) in enumerate(zip(bars4, speedup)):
+        ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height(),
+                f'{val:.2f}×', ha='center', va='bottom', fontsize=8)
+    # 5. End-to-End Throughput
+    ax5 = axes[1, 1]
+    throughput_gain = [(t / baseline_throughput - 1) * 100 if baseline_throughput > 0 else 0 for t in throughputs]
+    bars5 = ax5.bar(range(len(configs)), throughput_gain, color='coral')
+    ax5.set_xticks(range(len(configs)))
+    ax5.set_xticklabels(configs, rotation=45, ha='right')
+    ax5.set_ylabel('Throughput Increase (%)')
+    ax5.set_title('(e) End-to-End Throughput Gain')
+    ax5.axhline(y=0, color='black', linestyle='-', alpha=0.5)
+    ax5.grid(True, alpha=0.3)
+    # Annotate
+    for i, (bar, val) in enumerate(zip(bars5, throughput_gain)):
+        ax5.text(bar.get_x() + bar.get_width()/2, bar.get_height() if val > 0 else 0,
+                f'+{val:.1f}%', ha='center', va='bottom' if val > 0 else 'top', fontsize=8)
+    # 6. Component Contribution Analysis
+    ax6 = axes[1, 2]
+    # Calculate component contributions
+    full_compression = next((compression_ratios[i] for i, c in enumerate(configs) if c == 'full_system'), 1)
+    contributions = {}
+    for config in configs:
+        if config != 'full_system' and config != 'baseline':
+            comp = ablation_results[config]['summary']['compression_ratio']
+            contributions[config] = (full_compression / comp - 1) * 100 if comp > 0 else 0
+    if contributions:
+        sorted_contribs = sorted(contributions.items(), key=lambda x: x[1], reverse=True)
+        config_names = [c[0] for c in sorted_contribs]
+        contrib_values = [c[1] for c in sorted_contribs]
+        bars6 = ax6.barh(range(len(config_names)), contrib_values, color='teal')
+        ax6.set_yticks(range(len(config_names)))
+        ax6.set_yticklabels(config_names)
+        ax6.set_xlabel('Compression Contribution (%)')
+        ax6.set_title('(f) Component Importance for 450× Target')
+        ax6.grid(True, alpha=0.3)
+        # Annotate
+        for i, (bar, val) in enumerate(zip(bars6, contrib_values)):
+            ax6.text(bar.get_width(), bar.get_y() + bar.get_height()/2,
+                    f' {val:.1f}%', ha='left', va='center', fontsize=8)
+    plt.suptitle('Ablation Study: Component Analysis for 450× Compression on GPT-Neo',
+                fontsize=14, fontweight='bold')
+    plt.tight_layout()
+    # Save to file
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    plot_path = os.path.join(tempfile.gettempdir(), f"ablation_study_{timestamp}.png")
+    plt.savefig(plot_path, dpi=150, bbox_inches='tight')
+    plt.close()
+    logger.info(f"Ablation study plots saved: {plot_path}")
+    return plot_path
 def generate_comparison_plots(summaries: Dict[str, Any], metrics_dict: Dict[str, Any] = None) -> str:
     """Generate publication-grade comparison plots. Returns filepath."""
     fig, axes = plt.subplots(1, 3, figsize=(16, 5))
     logger.info(f"Loaded {len(texts)} text samples from {config.dataset_name}")
     return texts
+def run_ablation_study(model_name: str, base_config: CompressionConfig, dataset_texts: List[str]) -> Dict[str, Any]:
+    """Run comprehensive ablation study testing individual components."""
+    logger.info("Starting ablation study for Enhanced SPG components")
+    ablation_results = {}
+    ablation_config = base_config.ablation
+    # Test configurations
+    test_configs = []
+    # Baseline (no compression)
+    test_configs.append(("baseline", {
+        "compression_type": CompressionType.NONE,
+        "description": "No compression baseline"
+    }))
+    # Full system
+    test_configs.append(("full_system", {
+        "compression_type": CompressionType.ENHANCED_SPG,
+        "description": "Full Enhanced SPG system"
+    }))
+    if ablation_config.test_stage1_only:
+        test_configs.append(("stage1_only", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "enable_two_stage": True,
+            "stage2_compression_ratio": 1.0,  # Effectively disable Stage 2
+            "description": "Stage 1 only"
+        }))
+    if ablation_config.test_stage2_only:
+        test_configs.append(("stage2_only", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "enable_two_stage": True,
+            "stage1_compression_ratio": 1.0,  # Effectively disable Stage 1
+            "description": "Stage 2 only"
+        }))
+    if ablation_config.test_no_head_compression:
+        test_configs.append(("no_head_compression", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "enable_head_compression": False,
+            "description": "No head compression"
+        }))
+    if ablation_config.test_no_adaptive_decomp:
+        test_configs.append(("no_adaptive_decomp", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "use_adaptive_decomposition": False,
+            "description": "No adaptive decomposition"
+        }))
+    if ablation_config.test_no_hsa:
+        test_configs.append(("no_hsa", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "use_hybrid_sparse_attention": False,
+            "description": "No hybrid sparse attention"
+        }))
+    if ablation_config.test_no_snapkv:
+        test_configs.append(("no_snapkv", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "use_snapkv_plus_plus": False,
+            "description": "No SnapKV++"
+        }))
+    if ablation_config.test_conservative_precision:
+        test_configs.append(("conservative_precision", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "use_aggressive_precision": False,
+            "description": "Conservative precision levels"
+        }))
+    if ablation_config.test_conservative_magnitude:
+        test_configs.append(("conservative_magnitude", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "magnitude_threshold_mode": "conservative",
+            "description": "Conservative magnitude threshold"
+        }))
+    if ablation_config.test_no_recent_window:
+        test_configs.append(("no_recent_window", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "recent_window": 0,
+            "description": "No recent window protection"
+        }))
+    if ablation_config.test_reduced_fp16_heads:
+        test_configs.append(("reduced_fp16_heads", {
+            "compression_type": CompressionType.ENHANCED_SPG,
+            "head_fp16_reserve": 1,
+            "description": "Reduced FP16 reserved heads"
+        }))
+    # Test each configuration
+    for config_name, config_overrides in test_configs:
+        logger.info(f"Testing ablation config: {config_name}")
+        # Create modified config
+        from dataclasses import replace
+        test_config = replace(base_config)
+        test_config.eval_samples = ablation_config.eval_samples_per_config
+        test_config.n_seeds = ablation_config.n_seeds
+        # Apply overrides
+        if "compression_type" in config_overrides:
+            test_config.compression_type = config_overrides["compression_type"]
+        # Apply Enhanced SPG config overrides
+        if test_config.compression_type in [CompressionType.ENHANCED_SPG, CompressionType.PROGRESSIVE_SPG]:
+            spg_config_dict = {}
+            for key, value in config_overrides.items():
+                if key not in ["compression_type", "description"]:
+                    spg_config_dict[key] = value
+            if spg_config_dict:
+                test_config.enhanced_spg_config = replace(
+                    base_config.enhanced_spg_config,
+                    **spg_config_dict
+                )
+        # Run benchmark
+        try:
+            metrics, summary, per_sample_records, per_layer_fingerprints = run_research_benchmark(
+                model_name, test_config, dataset_texts=dataset_texts
+            )
+            ablation_results[config_name] = {
+                "config": config_overrides,
+                "metrics": metrics,
+                "summary": summary,
+                "description": config_overrides.get("description", "")
+            }
+        except Exception as e:
+            logger.error(f"Failed ablation test {config_name}: {e}")
+            ablation_results[config_name] = {
+                "config": config_overrides,
+                "error": str(e),
+                "description": config_overrides.get("description", "")
+            }
+    logger.info("Ablation study complete")
+    return ablation_results
 def run_research_benchmark(model_name: str, config: CompressionConfig, dataset_texts: Optional[List[str]] = None) -> Tuple[BenchmarkMetrics, Dict, List[Dict], List[Dict]]:
     """Research-grade benchmark with enhanced SPG support and fail-fast validation. Returns metrics, summary, and proof records."""
     logger.info(f"Starting research benchmark: {model_name} with {config.compression_type.value}")
     return final_metrics, summary, per_sample_records, per_layer_fingerprints
+def generate_latex_table(results: List[Dict[str, Any]], ablation_results: Dict[str, Any] = None) -> str:
+    """Generate LaTeX table with enhanced SPG results and optional ablation study."""
     latex = r"""\begin{table}[htbp]
 \centering
+\caption{Enhanced SPG: Research Standards Compliant 450x Compression on GPT-Neo with Ablation Study}
+\label{tab:enhanced_spg_450x_ablation_gptneo}
 \begin{tabular}{lcccccccc}
 \toprule
 Method & Peak Mem. & KV Mem. & Decode & Prefill PPL & Gen. PPL & Compr. & Bits/Token & Aux. OH \\
         latex += f"{method} & {peak_mem} & {kv_mem} & {decode} & {prefill_ppl} & {gen_ppl} & {comp} & {bits_per_token} & {aux_overhead} \\\\\n"
+    # Add ablation results if provided
+    if ablation_results:
+        latex += r"\midrule" + "\n"
+        latex += r"\multicolumn{9}{c}{\textbf{Ablation Study Results}} \\" + "\n"
+        latex += r"\midrule" + "\n"
+        for config_name, result in ablation_results.items():
+            if 'summary' in result:
+                method = config_name.replace('_', r'\_')
+                summary = result['summary']
+                peak_mem = f"{summary.get('peak_memory_mb', 0):.1f}"
+                kv_mem = f"{summary.get('kv_cache_memory_mb', 0):.1f}"
+                decode = f"{summary.get('decode_time_ms', 0):.2f}"
+                prefill_ppl = f"{summary.get('prefill_perplexity', 0):.2f}"
+                gen_ppl = f"{summary.get('generation_perplexity', 0):.2f}"
+                comp = f"{summary.get('compression_ratio', 1.0):.1f}$\\times$"
+                latex += f"{method} & {peak_mem} & {kv_mem} & {decode} & {prefill_ppl} & {gen_ppl} & {comp} & - & - \\\\\n"
     latex += r"""\bottomrule
 \end{tabular}
+\parbox{\textwidth}{\footnotesize Enhanced SPG achieving 450x compression on GPT-Neo with full non-negotiables compliance and component ablation}
 \end{table}"""
     return latex
 def create_research_interface():
+    """Research-grade interface for GPT-Neo with STRICT non-negotiables compliance, proving protocol, and ablation study."""
     def run_benchmark(model_variant, compression_types, seq_length, eval_samples,
                       dataset_name, dataset_config,
                       sequence_compression_ratio, head_compression_ratio,
                       generate_latex, n_bootstrap, n_seeds, enable_proving,
                       enable_ratio_sweep, ratio_sweep_points,
+                      enable_ablation, ablation_samples_per_config, ablation_n_seeds,
                       progress=gr.Progress()):
+        """Run 450x compression benchmark with FULL compliance, proving protocol, and ablation study."""
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model_name = f"EleutherAI/gpt-neo-{model_variant}"
         all_summaries = {}
         all_per_sample_records = {}
         all_per_layer_fingerprints = {}
+        ablation_results = {}
         # For ratio sweep
         summaries_by_ratio = {}
                 "configurable_parameters": True,
                 "fail_on_cpu_fallback": True,  # STRICT COMPLIANCE
                 "no_proxy_metrics": True,
+                "proving_enabled": enable_proving,
+                "ablation_enabled": enable_ablation
             },
             "target_compression": target_compression_ratio
         }
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
+        # Create base config
+        base_config = CompressionConfig(
+            compression_type=CompressionType.ENHANCED_SPG,
+            seed=42,
             eval_samples=eval_samples,
+            prefill_length=seq_length,
+            generation_length=64,
+            n_seeds=n_seeds,
+            n_bootstrap=n_bootstrap,
+            generate_latex=generate_latex,
             dataset_name=dataset_name,
             dataset_config=dataset_config if dataset_config else None,
+            enhanced_spg_config=EnhancedSPGConfig(
+                base_decay_rate=spg_decay_rate,
+                enable_adaptive=spg_enable_adaptive,
+                target_perplexity_delta=spg_target_ppl,
+                enable_two_stage=enhanced_enable_two_stage,
+                stage1_compression_ratio=enhanced_stage1_ratio,
+                stage2_compression_ratio=enhanced_stage2_ratio,
+                enable_head_compression=enhanced_enable_head_compression,
+                enable_progressive=enhanced_enable_progressive,
+                initial_compression_ratio=enhanced_initial_compression,
+                max_compression_ratio=enhanced_max_compression,
+                target_compression_ratio=target_compression_ratio,
+                use_adaptive_decomposition=use_adaptive_decomposition,
+                use_hybrid_sparse_attention=use_hybrid_sparse_attention,
+                use_snapkv_plus_plus=use_snapkv_plus_plus,
+                head_retention_mode=head_retention_mode,
+                magnitude_threshold_mode=magnitude_threshold_mode,
+                use_aggressive_precision=use_aggressive_precision,
+                sequence_compression_ratio=sequence_compression_ratio,
+                head_compression_ratio=head_compression_ratio,
+                quality_feedback_frequency=quality_feedback_frequency,
+                recent_boost_factor=recent_boost_factor,
+                progressive_min_ratio=progressive_min_ratio,
+                min_tokens_for_stability=min_tokens_for_stability,
+                stage_compression_min=stage_compression_min,
+                stage_compression_max=stage_compression_max,
+                recent_window=recent_window,
+                recent_min_precision=1.0,
+                head_fp16_reserve=head_fp16_reserve,
+                quality_threshold=0.01
+            ),
+            fail_on_cpu_fallback=True,
+            proving=ProvingConfig(enabled=enable_proving),
+            ablation=AblationConfig(
+                enabled=enable_ablation,
+                eval_samples_per_config=ablation_samples_per_config,
+                n_seeds=ablation_n_seeds
+            )
         )
+        shared_texts = load_real_dataset_samples(base_config, tokenizer)
         progress(0.1, desc=f"Starting 450x compression benchmark on GPT-Neo {model_variant}...")
+        # Run ablation study if enabled
+        if enable_ablation:
+            progress(0.1, desc="Running ablation study...")
+            ablation_results = run_ablation_study(model_name, base_config, shared_texts)
+            progress(0.3, desc="Ablation study complete, continuing with main benchmark...")
         # Loop over compression ratios if sweep enabled
         for ratio_idx, test_ratio in enumerate(compression_ratios):
             if enable_ratio_sweep:
+                progress((0.3 + 0.5 * ratio_idx / len(compression_ratios)),
                         desc=f"Testing ratio {test_ratio}x...")
             ratio_summaries = {}
             for i, comp_type in enumerate(compression_types):
                 if not enable_ratio_sweep:
+                    progress((0.3 + 0.6 * i / len(compression_types)), desc=f"Evaluating {comp_type}...")
                 # Skip NONE for non-1x ratios in sweep
                 if enable_ratio_sweep and comp_type == "NONE" and test_ratio != 1:
                         stage_compression_min=stage_compression_min,
                         stage_compression_max=stage_compression_max,
                         recent_window=recent_window,
+                        recent_min_precision=1.0,
                         head_fp16_reserve=head_fp16_reserve,
+                        quality_threshold=0.01
                     )
                     config = CompressionConfig(
         df = pd.DataFrame(results)
+        # Prepare export data
         export_data = {
             "configuration": benchmark_config,
             "results": all_summaries,
             "summary_table": results,
             "statistical_tests": {},
+            "compression_sweep": {str(k): v for k, v in summaries_by_ratio.items()} if enable_ratio_sweep and summaries_by_ratio else None,
+            "ablation_study": ablation_results if enable_ablation else None
         }
         # Add statistical comparisons to export
                         'prefill_perplexity': float(result_summary["Prefill PPL"]),
                         'generation_perplexity': float(result_summary["Gen. PPL"]),
                         'compression_ratio': float(result_summary["Compression Ratio"][:-1]),
+                        'spg_avg_bits_per_token': 16.0,
                         'enhanced_spg_auxiliary_overhead_mb': all_summaries[comp_type].get('enhanced_spg_measured_auxiliary_overhead_mb', 0)
                     })
             if latex_results:
+                latex_output = generate_latex_table(latex_results, ablation_results if enable_ablation else None)
                 export_data["latex_table"] = latex_output
         # Determine achieved compression
         proof_bundle_path = None
         verification_result = None
         plots_path = None
+        ablation_plots_path = None
         verification_msg = ""
         if enable_proving and all_per_sample_records:
             try:
+                # Include all methods' records
                 combined_records = []
                 combined_fingerprints = []
                 methods_in_bundle = []
                 for method in all_per_sample_records:
                     combined_records.extend(all_per_sample_records[method])
                     combined_fingerprints.extend(all_per_layer_fingerprints.get(method, []))
                     methods_in_bundle.append(method)
+                # Choose primary method for verification
                 if "PROGRESSIVE_SPG" in all_summaries:
                     method_for_proof = "PROGRESSIVE_SPG"
                 elif "ENHANCED_SPG" in all_summaries:
                 logger.info(f"Proof bundle includes: {methods_in_bundle}, verifying: {method_for_proof}")
                 summary_for_proof = all_summaries[method_for_proof]
                 metrics_for_proof = all_metrics[method_for_proof]
                 summary_for_proof["methods_included"] = methods_in_bundle
                 summary_for_proof["primary_method"] = method_for_proof
                 if "NONE" in all_summaries:
                     summary_for_proof["baseline_kv_mb"] = all_summaries["NONE"].get("kv_cache_memory_mb", 0)
                     summary_for_proof["baseline_decode_ms"] = all_summaries["NONE"].get("decode_time_ms", 0)
+                # Export proof bundle
                 bundle_dir = os.path.join(tempfile.gettempdir(), f"proof_bundle_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
                 proof_bundle_path = export_proof_bundle(
                     bundle_dir,
+                    base_config,
+                    metrics_for_proof,
+                    summary_for_proof,
+                    combined_records,
+                    combined_fingerprints
                 )
+                # Verify the bundle
                 verification_result = verify_proof_bundle(
+                    bundle_dir, base_config, base_config.proving
                 )
                 if verification_result["ok"]:
                 else:
                     verification_msg = f"❌ **Proof Verification: FAILED**\n{verification_result['failures']}"
                     logger.error(f"PROOF VERIFICATION FAILED: {verification_result['failures']}")
                     if os.environ.get("CI") == "true":
                         raise RuntimeError(f"CI VERIFICATION FAILED: {verification_result['failures']}")
                 logger.error(f"Failed to generate trade-off plots: {e}")
                 tradeoff_path = None
+        # Generate ablation plots if ablation study was done
+        if enable_ablation and ablation_results and "baseline" in ablation_results:
+            try:
+                ablation_plots_path = plot_ablation_results(ablation_results, ablation_results["baseline"]["summary"])
+            except Exception as e:
+                logger.error(f"Failed to generate ablation plots: {e}")
+                ablation_plots_path = None
         # Get layer count for display
         n_layers = {
             "125M": 12,
             "2.7B": 32
         }.get(model_variant, "?")
+        # Prepare ablation summary text
+        ablation_text = ""
+        if enable_ablation and ablation_results:
+            ablation_text = "\n\n**Ablation Study Results:**"
+            for config_name, result in ablation_results.items():
+                if 'summary' in result:
+                    ablation_text += f"\n- {config_name}: {result['summary']['compression_ratio']:.1f}× compression, {result['summary']['generation_perplexity']:.2f} PPL"
         summary_text = f"""
         ## 🎯 450x Compression on GPT-Neo {model_variant} with FULL Non-Negotiables Compliance
         {'✅ Proof bundle generated' if proof_bundle_path else ''}
         {verification_msg}
         {'✅ Compression trade-off plots generated' if tradeoff_path else ''}
+        {'✅ Ablation study completed' if enable_ablation else ''}
         **GPT-Neo Specific Settings:**
         - {n_layers} transformer layers (auto-detected)
         - Recent Window: {recent_window} tokens
         - Stage 1 Compression: {enhanced_stage1_ratio}x
         - Stage 2 Compression: {enhanced_stage2_ratio}x
+        {ablation_text}
         """
         # Prepare trade-off data for export
                 }
             }
+        return df, summary_text, latex_output, export_data, proof_bundle_path, plots_path, tradeoff_path, tradeoff_data, ablation_plots_path
     def save_json_file(json_data):
         """Create downloadable JSON file."""
         return filepath
+    with gr.Blocks(title="GPT-Neo Enhanced SPG: 450x Compression with Ablation Study", theme=gr.themes.Soft()) as demo:
         gr.Markdown(f"""
+        # 🎯 GPT-Neo Enhanced SPG: 450x Compression with Ablation Study
         **GPT-Neo Capabilities:**
         - **Max Sequence Length:** {GPT_NEO_MAX_SEQUENCE_LENGTH} tokens (full 2048 context)
         - ✅ NO fake results - Reproducible
         - ✅ Clean code - Full validation
         - ✅ Hardware validation - GPU memory checked
+        - 🔬 **NEW**: Component Ablation Study
         """)
         with gr.Row():
                         sequence_compression_ratio = gr.Slider(0.0001, 0.001, value=0.00018, step=0.00002, label="Sequence Ratio")
                         head_compression_ratio = gr.Slider(0.0001, 0.001, value=0.00018, step=0.00002, label="Head Ratio")
+                with gr.Accordion("🔬 Ablation Study Settings (NEW)", open=False):
+                    enable_ablation = gr.Checkbox(label="Enable Ablation Study", value=True)
+                    gr.Markdown("**Ablation Study will test:**")
+                    gr.Markdown("""
+                    - Baseline (no compression)
+                    - Stage 1 only
+                    - Stage 2 only
+                    - No head compression
+                    - No adaptive decomposition
+                    - No hybrid sparse attention
+                    - No SnapKV++
+                    - Conservative precision levels
+                    - Conservative magnitude threshold
+                    - No recent window protection
+                    - Reduced FP16 reserved heads
+                    """)
+                    with gr.Row():
+                        ablation_samples_per_config = gr.Slider(3, 10, value=5, step=1, label="Samples per Ablation Config")
+                        ablation_n_seeds = gr.Slider(1, 3, value=2, step=1, label="Seeds for Ablation")
                 with gr.Accordion("Compliance Parameters (NO HARDCODING)", open=False):
                     quality_feedback_frequency = gr.Slider(1, 64, value=16, step=1, label="Quality Feedback Frequency")
                     recent_boost_factor = gr.Slider(0.0, 1.0, value=0.1, step=0.01, label="Recent Boost Factor")
                     ratio_sweep_points = gr.Slider(3, 8, value=5, step=1,
                                                   label="Sweep Points (1× to 450×)")
+                run_button = gr.Button("🎯 Run GPT-Neo 450x Benchmark with Ablation", variant="primary")
             with gr.Column(scale=2):
                 results_table = gr.DataFrame(label="GPT-Neo 450x Compression Results")
                         tradeoff_json = gr.JSON(label="Trade-off Data", visible=False)
                         export_tradeoff_button = gr.Button("📊 Export Trade-off Data", variant="secondary")
                         download_tradeoff_file = gr.File(label="Download Trade-off JSON", visible=False)
+                with gr.Accordion("🔬 Ablation Study Results (NEW)", open=False):
+                    ablation_plots = gr.Image(label="Component Contribution Analysis", type="filepath")
         # Connect the benchmark
         benchmark_outputs = run_button.click(
                    min_tokens_for_stability, stage_compression_min, stage_compression_max,
                    sequence_compression_ratio, head_compression_ratio,
                    generate_latex, n_bootstrap, n_seeds, enable_proving,
+                   enable_ratio_sweep, ratio_sweep_points,
+                   enable_ablation, ablation_samples_per_config, ablation_n_seeds],
             outputs=[results_table, summary_output, latex_output, json_output,
+                    proof_bundle_file, plots_image, tradeoff_plots, tradeoff_json,
+                    ablation_plots]
         )
         # Export functionality
         )
         gr.Markdown(f"""
+        ### 🔬 Ablation Study Details
+        **Component Analysis:**
+        The ablation study systematically tests each component's contribution to achieving 450× compression:
+        - **Stage 1 (Permanent Eviction)**: Tests SnapKV++ and magnitude-guided token selection
+        - **Stage 2 (Multi-dimensional)**: Tests hybrid sparse attention and head compression
+        - **Precision Levels**: Compares aggressive INT4 floor vs conservative FP16/INT8
+        - **Magnitude Thresholds**: Tests extreme (0.1%) vs conservative (1%) thresholds
+        - **Position Awareness**: Tests impact of recent window and sink token protection
+        - **Head Selection**: Tests reserved FP16 heads for critical attention patterns
+        **Metrics Evaluated:**
+        - Compression ratio achievement
+        - Generation perplexity degradation
+        - Memory reduction percentage
+        - Decode speedup factor
+        - End-to-end throughput gain
+        - Component importance ranking
+        ### 📬 GPT-Neo Architecture Details
         **Model Specifications:**
         - **GPT-Neo 125M**: 12 layers, 768 hidden dim, 12 heads
         - **1.3B**: Minimum 6GB VRAM
         - **2.7B**: Minimum 12GB VRAM (16GB+ recommended)
         ### 📦 Proving Protocol Features
         **Attestable Proof Bundle (.zip) contains:**
         - Per-sample raw measurements
         - Layer-level compression fingerprints
         - Exact package versions for reproducibility
+        - Ablation study results (if enabled)
         **Verification:**
         - Recomputes summary from raw records
         - Checks numerical tolerances
         - Hard-fails in CI if verification fails
+        This ensures research-grade reproducibility on GPT-Neo models with full 2048 token context and component analysis.
         """)
     return demo