terencezhang2024
/

llama4-scout-lora-alpaca

instruction-tuning

Model card Files Files and versions

llama4-scout-lora-alpaca / torchtune_config.yaml

terencezhang2024's picture

terencezhang2024

Upload folder using huggingface_hub

f318415 verified 6 months ago

history blame contribute delete

1.82 kB

	output_dir: /tmp/torchtune/llama4_17Bx16E/lora
	model:
	_component_: torchtune.models.llama4.lora_llama4_scout_17b_16e
	decoder_trainable: lora
	encoder_trainable: frozen
	fusion_trainable: lora
	lora_attn_modules:
	- q_proj
	- v_proj
	- output_proj
	apply_lora_to_mlp: true
	apply_lora_to_output: false
	lora_rank: 16
	lora_alpha: 32
	lora_dropout: 0.0
	tokenizer:
	_component_: torchtune.models.llama4.llama4_transform
	path: /dev/shm/model_cache/Llama-4-Scout-17B-16E-Instruct/tokenizer.model
	max_seq_len: 2048
	max_num_tiles: 16
	checkpointer:
	_component_: torchtune.training.FullModelHFCheckpointer
	checkpoint_dir: /dev/shm/model_cache/Llama-4-Scout-17B-16E-Instruct
	checkpoint_files:
	filename_format: model-{}-of-{}.safetensors
	max_filename: '00050'
	recipe_checkpoint: null
	output_dir: ${output_dir}
	model_type: LLAMA4
	save_adapter_weights_only: true
	resume_from_checkpoint: false
	dataset:
	_component_: torchtune.datasets.alpaca_dataset
	packed: true
	seed: null
	shuffle: true
	epochs: 1
	max_steps_per_epoch: null
	batch_size: 4
	gradient_accumulation_steps: 1
	optimizer:
	_component_: torch.optim.AdamW
	lr: 2.0e-05
	fused: false
	optimizer_in_bwd: false
	lr_scheduler:
	_component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
	num_warmup_steps: 100
	loss:
	_component_: torchtune.modules.loss.LinearCrossEntropyLoss
	clip_grad_norm: null
	device: cuda
	enable_activation_checkpointing: true
	enable_activation_offloading: false
	custom_sharded_layers:
	- tok_embeddings
	fsdp_cpu_offload: false
	compile: false
	dtype: bf16
	metric_logger:
	_component_: torchtune.training.metric_logging.DiskLogger
	log_dir: ${output_dir}/logs
	log_every_n_steps: 1
	log_peak_memory_stats: true
	profiler:
	_component_: torchtune.training.setup_torch_profiler
	enabled: false