Spaces:
Runtime error
Runtime error
File size: 1,451 Bytes
adb15f9 a57357b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
{
"hardware_name": "4xL4",
"specs": {
"gpu_count": 4,
"gpu_type": "L4",
"vram_per_gpu": 24,
"total_vram": 96,
"vcpu_count": 48,
"ram": 186
},
"training_optimizations": {
"per_device_batch_size": 24,
"gradient_accumulation_steps": 2,
"effective_batch_size": 192,
"memory_optimizations": {
"use_gradient_checkpointing": true,
"pin_memory": true,
"num_workers": 4,
"use_flash_attention": true
},
"distributed_settings": {
"device_map": "auto",
"ddp_find_unused_parameters": false,
"use_fsdp": true,
"fsdp_config": {
"sharding_strategy": "FULL_SHARD",
"mixed_precision": "BF16",
"activation_checkpointing": true
}
}
},
"memory_breakdown": {
"model_size": "~3.5GB (pre-quantized 4-bit)",
"optimizer_states": "~1GB",
"batch_memory_per_gpu": "~3GB",
"peak_memory_estimate": "~18GB",
"safe_headroom": "~6GB"
},
"compute_environment": "L4_CLOUD",
"distributed_type": "FSDP",
"mixed_precision": "bf16",
"num_gpus": 4,
"training_parameters": {
"per_device_train_batch_size": 24,
"gradient_accumulation_steps": 2,
"dataloader_num_workers": 4,
"dataloader_pin_memory": true,
"gradient_checkpointing": true,
"max_grad_norm": 1.0
},
"memory_optimization": {
"offload_to_cpu": false,
"use_flash_attention": true,
"use_gradient_checkpointing": true
}
} |