hf-train-frontend

Runtime error

App Files Files Community

hf-train-frontend / transformers_config.json

George-API

Upload folder using huggingface_hub

bf7bd7e verified 2 months ago

raw

history blame

4.03 kB

	{
	"model": {
	"name": "unsloth/phi-4-unsloth-bnb-4bit",
	"trust_remote_code": true,
	"use_fast_tokenizer": true
	},

	"tokenizer": {
	"chat_template": "phi",
	"max_seq_length": 2048,
	"padding_side": "right",
	"add_eos_token": true
	},

	"training": {
	"per_device_train_batch_size": 16,
	"gradient_accumulation_steps": 3,
	"learning_rate": 2e-5,
	"num_train_epochs": 3,
	"max_steps": -1,
	"logging_steps": 10,
	"save_steps": 200,
	"save_total_limit": 5,
	"push_to_hub": true,
	"hub_strategy": "every_save",
	"gradient_checkpointing": true,
	"optim": "adamw_torch",
	"lr_scheduler_type": "cosine",
	"warmup_ratio": 0.05,
	"weight_decay": 0.01,
	"max_grad_norm": 1.0,
	"neftune_noise_alpha": 5,
	"fp16": false,
	"bf16": true
	},

	"checkpointing": {
	"output_dir": "./results",
	"save_strategy": "steps",
	"save_steps": 100,
	"save_total_limit": 3,
	"hub_strategy": "every_save"
	},

	"unsloth": {
	"enabled": true,
	"r": 32,
	"alpha": 16,
	"dropout": 0,
	"target_modules": [
	"q_proj",
	"k_proj",
	"v_proj",
	"o_proj",
	"gate_proj",
	"up_proj",
	"down_proj"
	]
	},

	"distributed_training": {
	"fsdp_config": {
	"enabled": false,
	"sharding_strategy": "FULL_SHARD",
	"mixed_precision": "BF16",
	"activation_checkpointing": true,
	"offload_params": false
	},
	"ddp_find_unused_parameters": false,
	"dataloader_num_workers": 2,
	"ddp_config": {
	"enabled": true,
	"backend": "nccl",
	"find_unused_parameters": false,
	"broadcast_buffers": false,
	"gradient_as_bucket_view": true
	}
	},

	"logging": {
	"logging_steps": 50,
	"log_level": "info"
	},

	"huggingface_hub": {
	"push_to_hub": true,
	"hub_model_id": "phi-4-cognitive-assistant",
	"hub_private_repo": true
	},

	"model_name_or_path": "unsloth/phi-4-unsloth-bnb-4bit",
	"model_revision": "main",
	"use_flash_attention": true,
	"torch_dtype": "bfloat16",
	"bf16": true,
	"fp16": false,

	"hardware": {
	"hardware_name": "4xL4",
	"specs": {
	"gpu_count": 4,
	"gpu_type": "L4",
	"vram_per_gpu": 24,
	"total_vram": 96,
	"vcpu_count": 48,
	"ram": 186
	},
	"hardware_setup": {
	"use_cpu": false,
	"num_gpus": 4,
	"device_map": "auto"
	},
	"training_optimizations": {
	"per_device_batch_size": 16,
	"gradient_accumulation_steps": 3,
	"mixed_precision": "bf16",
	"torch_compile": false,
	"memory_optimizations": {
	"use_gradient_checkpointing": true,
	"use_flash_attention": true
	},
	"multi_gpu_strategy": "ddp"
	},
	"system_settings": {
	"cuda_memory_fraction": 0.75,
	"dataloader_num_workers": 4,
	"dataloader_pin_memory": true
	},
	"memory_breakdown": {
	"model_size": "~3.5GB (pre-quantized 4-bit)",
	"optimizer_states": "~1GB",
	"batch_memory_per_gpu": "~3GB",
	"peak_memory_estimate": "~18GB",
	"safe_headroom": "~6GB"
	},
	"compute_environment": "L4_CLOUD"
	},

	"dataset": {
	"dataset": {
	"name": "George-API/phi4-cognitive-dataset",
	"split": "train"
	},
	"data_formatting": {
	"chat_template": "phi",
	"conversation_structure": {
	"system_identifier": "[RESEARCH INTRODUCTION]",
	"turn_order": ["human", "assistant"]
	},
	"roles": {
	"system": "System: {content}\n\n",
	"human": "Human: {content}\n\n",
	"assistant": "Assistant: {content}\n\n"
	}
	},
	"data_loading": {
	"batch_size": 24,
	"shuffle": false,
	"sequential_processing": true,
	"drop_last": false,
	"num_workers": 4,
	"pin_memory": true,
	"prefetch_factor": 4
	},
	"validation": {
	"log_samples": 3,
	"log_interval": 50,
	"verify_sequence_integrity": true,
	"metrics": ["processed", "skipped", "avg_tokens", "unique_articles"]
	}
	}
	}