{ "hardware_name": "2xA10G", "specs": { "gpu_count": 2, "gpu_type": "A10G", "vram_per_gpu": 24, "total_vram": 48, "vcpu_count": 24, "ram": 92 }, "training_optimizations": { "per_device_batch_size": 16, "gradient_accumulation_steps": 4, "effective_batch_size": 128, "memory_optimizations": { "use_gradient_checkpointing": true, "pin_memory": true, "num_workers": 2 }, "distributed_settings": { "device_map": "auto", "ddp_find_unused_parameters": false } }, "memory_breakdown": { "model_size": "~3.5GB (pre-quantized 4-bit)", "optimizer_states": "~1GB", "batch_memory_per_gpu": "~2GB", "peak_memory_estimate": "18-20GB", "safe_headroom": "4-6GB" }, "compute_environment": "A10G_CLOUD", "distributed_type": "DATA_PARALLEL", "mixed_precision": "bf16", "num_gpus": 2, "training_parameters": { "per_device_train_batch_size": 16, "gradient_accumulation_steps": 4, "dataloader_num_workers": 2, "dataloader_pin_memory": true, "gradient_checkpointing": true, "max_grad_norm": 1.0 }, "memory_optimization": { "offload_to_cpu": false, "use_flash_attention": true, "use_gradient_checkpointing": true } }