Spaces:
Runtime error
Runtime error
{ | |
"model": { | |
"name": "unsloth/phi-4-unsloth-bnb-4bit", | |
"trust_remote_code": true, | |
"use_fast_tokenizer": true | |
}, | |
"tokenizer": { | |
"chat_template": "phi", | |
"max_seq_length": 2048, | |
"padding_side": "right", | |
"add_eos_token": true | |
}, | |
"training": { | |
"per_device_train_batch_size": 16, | |
"gradient_accumulation_steps": 3, | |
"learning_rate": 2e-5, | |
"num_train_epochs": 3, | |
"max_steps": -1, | |
"logging_steps": 10, | |
"save_steps": 200, | |
"save_total_limit": 5, | |
"push_to_hub": true, | |
"hub_strategy": "every_save", | |
"gradient_checkpointing": true, | |
"optim": "adamw_torch", | |
"lr_scheduler_type": "cosine", | |
"warmup_ratio": 0.05, | |
"weight_decay": 0.01, | |
"max_grad_norm": 1.0, | |
"neftune_noise_alpha": 5, | |
"fp16": false, | |
"bf16": true | |
}, | |
"checkpointing": { | |
"output_dir": "./results", | |
"save_strategy": "steps", | |
"save_steps": 100, | |
"save_total_limit": 3, | |
"hub_strategy": "every_save" | |
}, | |
"unsloth": { | |
"enabled": true, | |
"r": 32, | |
"alpha": 16, | |
"dropout": 0, | |
"target_modules": [ | |
"q_proj", | |
"k_proj", | |
"v_proj", | |
"o_proj", | |
"gate_proj", | |
"up_proj", | |
"down_proj" | |
] | |
}, | |
"distributed_training": { | |
"fsdp_config": { | |
"enabled": false, | |
"sharding_strategy": "FULL_SHARD", | |
"mixed_precision": "BF16", | |
"activation_checkpointing": true, | |
"offload_params": false | |
}, | |
"ddp_find_unused_parameters": false, | |
"dataloader_num_workers": 2, | |
"ddp_config": { | |
"enabled": true, | |
"backend": "nccl", | |
"find_unused_parameters": false, | |
"broadcast_buffers": false, | |
"gradient_as_bucket_view": true | |
} | |
}, | |
"logging": { | |
"logging_steps": 50, | |
"log_level": "info" | |
}, | |
"huggingface_hub": { | |
"push_to_hub": true, | |
"hub_model_id": "phi-4-cognitive-assistant", | |
"hub_private_repo": true | |
}, | |
"model_name_or_path": "unsloth/phi-4-unsloth-bnb-4bit", | |
"model_revision": "main", | |
"use_flash_attention": true, | |
"torch_dtype": "bfloat16", | |
"bf16": true, | |
"fp16": false, | |
"hardware": { | |
"hardware_name": "4xL4", | |
"specs": { | |
"gpu_count": 4, | |
"gpu_type": "L4", | |
"vram_per_gpu": 24, | |
"total_vram": 96, | |
"vcpu_count": 48, | |
"ram": 186 | |
}, | |
"hardware_setup": { | |
"use_cpu": false, | |
"num_gpus": 4, | |
"device_map": "auto" | |
}, | |
"training_optimizations": { | |
"per_device_batch_size": 16, | |
"gradient_accumulation_steps": 3, | |
"mixed_precision": "bf16", | |
"torch_compile": false, | |
"memory_optimizations": { | |
"use_gradient_checkpointing": true, | |
"use_flash_attention": true | |
}, | |
"multi_gpu_strategy": "ddp" | |
}, | |
"system_settings": { | |
"cuda_memory_fraction": 0.75, | |
"dataloader_num_workers": 4, | |
"dataloader_pin_memory": true | |
}, | |
"memory_breakdown": { | |
"model_size": "~3.5GB (pre-quantized 4-bit)", | |
"optimizer_states": "~1GB", | |
"batch_memory_per_gpu": "~3GB", | |
"peak_memory_estimate": "~18GB", | |
"safe_headroom": "~6GB" | |
}, | |
"compute_environment": "L4_CLOUD" | |
}, | |
"dataset": { | |
"dataset": { | |
"name": "George-API/phi4-cognitive-dataset", | |
"split": "train" | |
}, | |
"data_formatting": { | |
"chat_template": "phi", | |
"conversation_structure": { | |
"system_identifier": "[RESEARCH INTRODUCTION]", | |
"turn_order": ["human", "assistant"] | |
}, | |
"roles": { | |
"system": "System: {content}\n\n", | |
"human": "Human: {content}\n\n", | |
"assistant": "Assistant: {content}\n\n" | |
} | |
}, | |
"data_loading": { | |
"batch_size": 24, | |
"shuffle": false, | |
"sequential_processing": true, | |
"drop_last": false, | |
"num_workers": 4, | |
"pin_memory": true, | |
"prefetch_factor": 4 | |
}, | |
"validation": { | |
"log_samples": 3, | |
"log_interval": 50, | |
"verify_sequence_integrity": true, | |
"metrics": ["processed", "skipped", "avg_tokens", "unique_articles"] | |
} | |
} | |
} |