Marin-8B-DPO-stage0 / easydel-training-arguments.json

Upload folder using huggingface_hub

553d6b3 verified 2 months ago

3.16 kB

	{
	"_can_log_metrics": null,
	"auto_shard_states": true,
	"aux_loss_enabled": false,
	"backend": null,
	"beta": 0.5,
	"clip_grad": 1.0,
	"custom_scheduler": null,
	"dataloader_num_workers": 0,
	"dataloader_pin_memory": false,
	"dataset_num_proc": null,
	"disable_dropout": true,
	"do_eval": true,
	"do_last_save": true,
	"do_train": true,
	"eval_batch_size": 4,
	"evaluation_steps": null,
	"extra_optimizer_kwargs": {},
	"force_use_ref_model": false,
	"frozen_parameters": null,
	"gradient_accumulation_steps": 1,
	"ids_to_pop_from_dataset": [],
	"init_tx": true,
	"is_encoder_decoder": null,
	"is_fine_tuning": true,
	"jax_distributed_config": null,
	"label_pad_token_id": -100,
	"label_smoothing": 0.0,
	"learning_rate": 1e-07,
	"learning_rate_end": 5e-07,
	"log_all_workers": false,
	"log_grad_norms": true,
	"log_steps": 5,
	"loss_config": {
	"break_on_nan": true,
	"classification_problem_type": null,
	"divide_weight_sum": false,
	"ignore_index": -100,
	"label_smoothing": 0.0,
	"loss_normalizing_factor": "SpecialLossNormalizingFactor.NO_WEIGHT_NUM_REAL_TARGET_TOKENS",
	"num_classification_labels": null,
	"num_labels": null,
	"problem_type": null,
	"reduction": null,
	"shift_tokens": true,
	"z_loss": 0.0
	},
	"loss_type": "sigmoid",
	"low_mem_usage": true,
	"max_completion_length": 2048,
	"max_evaluation_steps": null,
	"max_length": 4096,
	"max_prompt_length": 2048,
	"max_sequence_length": 8192,
	"max_training_steps": null,
	"metrics_to_show_in_rich_pbar": null,
	"model_name": "marin-8b-instruct-dpo",
	"model_parameters": null,
	"num_train_epochs": 4,
	"offload_dataset": false,
	"offload_device_index": 0,
	"offload_device_type": "cpu",
	"optimizer": "adamw",
	"padding_value": 128009,
	"per_epoch_evaluation_steps": null,
	"per_epoch_training_steps": null,
	"performance_mode": false,
	"precompute_ref_log_probs": false,
	"process_zero_is_admin": true,
	"progress_bar_type": "json",
	"pruning_module": null,
	"ref_model_mixup_alpha": 0.9,
	"ref_model_sync_steps": 64,
	"reference_free": false,
	"remove_ckpt_after_load": false,
	"remove_unused_columns": true,
	"report_metrics": true,
	"report_steps": 10,
	"rpo_alpha": null,
	"save_directory": "EasyDeL-Checkpoints",
	"save_optimizer_state": false,
	"save_steps": 1000,
	"save_total_limit": 5,
	"scheduler": "linear",
	"shuffle_train_dataset": true,
	"sparse_module_type": "bcoo",
	"sparsify_module": false,
	"state_apply_fn_kwarguments_to_model": null,
	"step_partition_spec": [
	[
	"dp",
	"fsdp"
	],
	"sp"
	],
	"step_start_point": 0,
	"sync_ref_model": false,
	"tools": null,
	"total_batch_size": 4,
	"track_memory": false,
	"train_on_inputs": true,
	"trainer_config_class": "DPOConfig",
	"training_time_limit": null,
	"truncation_mode": "keep_end",
	"tx_mu_dtype": null,
	"use_data_collactor": true,
	"use_wandb": true,
	"use_weighting": false,
	"verbose": true,
	"wandb_entity": "erfanzar",
	"wandb_name": null,
	"warmup_steps": 0,
	"weight_decay": 0.01,
	"weight_distribution_log_steps": 100,
	"weight_distribution_pattern": ".*"
	}