hf-train-frontend

Runtime error

hf-train-frontend / transformers_config.json

Upload folder using huggingface_hub

a57357b verified 2 months ago

1.55 kB

	{
	"model": {
	"name": "unsloth/phi-4-unsloth-bnb-4bit",
	"trust_remote_code": true,
	"use_fast_tokenizer": true
	},

	"tokenizer": {
	"chat_template": "phi",
	"max_seq_length": 2048,
	"padding_side": "right",
	"add_eos_token": true
	},

	"training": {
	"per_device_train_batch_size": 16,
	"gradient_accumulation_steps": 4,
	"learning_rate": 2e-5,
	"num_train_epochs": 3,
	"max_steps": -1,
	"logging_steps": 10,
	"save_steps": 200,
	"save_total_limit": 5,
	"push_to_hub": true,
	"hub_strategy": "every_save",
	"gradient_checkpointing": true,
	"optim": "adamw_torch",
	"lr_scheduler_type": "cosine",
	"warmup_ratio": 0.03,
	"weight_decay": 0.01,
	"max_grad_norm": 1.0,
	"neftune_noise_alpha": 5
	},

	"checkpointing": {
	"output_dir": "./results",
	"save_strategy": "steps",
	"save_steps": 100,
	"save_total_limit": 3,
	"hub_strategy": "every_save"
	},

	"unsloth": {
	"enabled": true,
	"r": 32,
	"alpha": 16,
	"dropout": 0.05,
	"target_modules": [
	"q_proj",
	"k_proj",
	"v_proj",
	"o_proj",
	"gate_proj",
	"up_proj",
	"down_proj"
	]
	},

	"logging": {
	"logging_steps": 50,
	"log_level": "info"
	},

	"huggingface_hub": {
	"push_to_hub": true,
	"hub_model_id": "phi-4-research-assistant",
	"hub_private_repo": true
	},

	"model_name_or_path": "unsloth/phi-4-unsloth-bnb-4bit",
	"model_revision": "main",
	"use_flash_attention": true,
	"torch_dtype": "bfloat16",
	"bf16": true
	}