{ "model": { "name": "unsloth/phi-4-unsloth-bnb-4bit", "trust_remote_code": true, "use_fast_tokenizer": true }, "tokenizer": { "chat_template": "phi", "max_seq_length": 2048, "padding_side": "right", "add_eos_token": true }, "training": { "per_device_train_batch_size": 24, "gradient_accumulation_steps": 2, "learning_rate": 2e-5, "num_train_epochs": 3, "max_steps": -1, "logging_steps": 10, "save_steps": 200, "save_total_limit": 5, "push_to_hub": true, "hub_strategy": "every_save", "gradient_checkpointing": true, "optim": "adamw_torch", "lr_scheduler_type": "cosine", "warmup_ratio": 0.05, "weight_decay": 0.01, "max_grad_norm": 1.0, "neftune_noise_alpha": 5 }, "checkpointing": { "output_dir": "./results", "save_strategy": "steps", "save_steps": 100, "save_total_limit": 3, "hub_strategy": "every_save" }, "unsloth": { "enabled": true, "r": 32, "alpha": 16, "dropout": 0.05, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ] }, "distributed_training": { "fsdp_config": { "enabled": true, "sharding_strategy": "FULL_SHARD", "mixed_precision": "BF16", "activation_checkpointing": true, "offload_params": false }, "ddp_find_unused_parameters": false, "dataloader_num_workers": 4 }, "logging": { "logging_steps": 50, "log_level": "info" }, "huggingface_hub": { "push_to_hub": true, "hub_model_id": "phi-4-research-assistant", "hub_private_repo": true }, "model_name_or_path": "unsloth/phi-4-unsloth-bnb-4bit", "model_revision": "main", "use_flash_attention": true, "torch_dtype": "bfloat16", "bf16": true }