File size: 2,697 Bytes
c5f698f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
{
"rl_model_name": null,
"rl_model_class": null,
"num_rollouts": 64,
"chunk_size": 16,
"ppo_epochs": 4,
"init_kl_coef": 0.05,
"num_layers_unfrozen": 2,
"rm_bits": 8,
"rl_lora_rank": 8,
"rl_lora_alpha": 32,
"rl_lora_dropout": 0.1,
"use_qlora_in_rl": false,
"use_rl_peft_config": true,
"lora_compute_dtype": "bfloat16",
"steps_per_print": 10,
"logdir": "aim-repo",
"aim_repo": null,
"experiment_name": "granite_3.2_faithfulness_balanced_response_id",
"stage": 2,
"overlap_comm": false,
"contiguous_gradients": false,
"cpu_offload": false,
"optimizer": {
"optimizer_class": "FusedAdam",
"lr": 1e-05,
"weight_decay": 0.1,
"betas": [
0.9,
0.95
],
"eps": 1e-10
},
"lr_schedule": "linear",
"warmup_steps": 200,
"datasets": [
{
"data_class": "JSONLinesDatasetRAGChat",
"data_name": "simulator-citations-chat",
"data_path": "data/faithfulness/full_response_balanced_response_id",
"data_sampling_proportion": 1,
"max_input_tokens": 5000,
"max_output_tokens": 500
}
],
"seed": 42,
"training_inference_type": "lora_finetuning",
"prompt_tuning_init": null,
"prompt_tuning_init_text": null,
"num_virtual_tokens": null,
"load_path": null,
"peft_num_layers": null,
"peft_num_attention_heads": null,
"model_name": "ibm-granite/granite-3.2-8b-instruct",
"tokenizer_name": null,
"model_class": "AutoModelForCausalLM",
"gma_model_class": "Model",
"dtype": "bfloat16",
"trust_remote_code": false,
"padding_side": null,
"lora_rank": 8,
"lora_alpha": 8,
"lora_dropout": 0.1,
"lora_target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"up_proj",
"down_proj",
"gate_proj"
],
"save_huggingface_checkpoint": true,
"quantization_method": "fp4",
"bnb_4bit_use_double_quant": false,
"use_quantization_for_inference": false,
"max_seq_len": 2048,
"attention_implementation": "flash_attention_2",
"num_labels": 1,
"use_sdpa_attention": false,
"save_path": "checkpoints/granite_3.2_faithfulness_balanced_response_id",
"ignore_sampling_proportion_for_validation": false,
"num_training_steps": 200000,
"gradient_accumulation_steps": 1,
"eval_interval": 20000,
"save_interval": 20000,
"batch_size_per_gpu": 1,
"coeff": 1.0,
"eval_during_training": true,
"smart_token_allocation": false,
"max_new_tokens": 0,
"gradient_checkpointing": true
} |