{
  "hardware_name": "2xA10G",
  "specs": {
    "gpu_count": 2,
    "gpu_type": "A10G",
    "vram_per_gpu": 24,
    "total_vram": 48,
    "vcpu_count": 24,
    "ram": 92
  },
  "training_optimizations": {
    "per_device_batch_size": 16,
    "gradient_accumulation_steps": 4,
    "effective_batch_size": 128,
    "memory_optimizations": {
      "use_gradient_checkpointing": true,
      "pin_memory": true,
      "num_workers": 2
    },
    "distributed_settings": {
      "device_map": "auto",
      "ddp_find_unused_parameters": false
    }
  },
  "memory_breakdown": {
    "model_size": "~3.5GB (pre-quantized 4-bit)",
    "optimizer_states": "~1GB",
    "batch_memory_per_gpu": "~2GB",
    "peak_memory_estimate": "18-20GB",
    "safe_headroom": "4-6GB"
  },
  "compute_environment": "A10G_CLOUD",
  "distributed_type": "DATA_PARALLEL",
  "mixed_precision": "bf16",
  "num_gpus": 2,
  "training_parameters": {
    "per_device_train_batch_size": 16,
    "gradient_accumulation_steps": 4,
    "dataloader_num_workers": 2,
    "dataloader_pin_memory": true,
    "gradient_checkpointing": true,
    "max_grad_norm": 1.0
  },
  "memory_optimization": {
    "offload_to_cpu": false,
    "use_flash_attention": true,
    "use_gradient_checkpointing": true
  }
}