RyanYr's picture
Save model at global step 5
285ac7f verified
data:
tokenizer: null
train_files: ace-numina_ds_train_sample.parquet
val_files: matheval.parquet
prompt_key: prompt
reward_fn_key: data_source
max_prompt_length: 768
max_response_length: 13312
train_batch_size: 1024
val_batch_size: 640
return_raw_input_ids: false
return_raw_chat: false
shuffle: true
filter_overlong_prompts: false
filter_overlong_prompts_workers: 64
filter_overlong_responses: true
truncation: left
image_key: images
custom_cls:
path: null
name: null
actor_rollout_ref:
hybrid_engine: true
model:
path: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
external_lib: null
override_config: {}
enable_gradient_checkpointing: true
use_remove_padding: false
use_liger: false
save_hf_repo_id: RyanYr/brm-ace-numina-ace-numina-r1qwen1.5B-base-lr2.5e-6-bfsz1024-mbs2048-beta0.002
tokenizer_chat_template: deepseek-r1-think
actor:
brm:
norm_factor: value
value_constant: null
buffer:
buffer_type: offline_dataset_buffer
offline_dataset_buffer:
train_files: ace-numina_ds_offline_train_sample.parquet
response_key: response
response_truncation: right
shuffle: true
update_size: 1024
strategy: fsdp
ppo_mini_batch_size: 2048
ppo_micro_batch_size: null
ppo_micro_batch_size_per_gpu: 2
use_dynamic_bsz: false
ppo_max_token_len_per_gpu: 16384
grad_clip: 1.0
use_torch_compile: true
ppo_epochs: 1
shuffle: false
ulysses_sequence_parallel_size: 1
checkpoint:
contents:
- model
- optimizer
- extra
optim:
lr: 2.5e-06
lr_warmup_steps: -1
lr_warmup_steps_ratio: 0
min_lr_ratio: null
warmup_style: constant
total_training_steps: 100
weight_decay: 0.01
fsdp_config:
wrap_policy:
min_num_params: 0
param_offload: false
optimizer_offload: false
fsdp_size: -1
report_entropy: false
ref:
ref_model_path: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
strategy: fsdp
fsdp_config:
param_offload: false
wrap_policy:
min_num_params: 0
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 4
log_prob_use_dynamic_bsz: false
log_prob_max_token_len_per_gpu: 16384
ulysses_sequence_parallel_size: 1
rollout:
name: vllm
temperature: 1.0
top_k: -1
top_p: 1
use_fire_sampling: false
prompt_length: 768
response_length: 13312
dtype: bfloat16
gpu_memory_utilization: 0.8
ignore_eos: false
enforce_eager: false
free_cache_engine: false
load_format: dummy_dtensor
tensor_model_parallel_size: 4
max_num_batched_tokens: 14080
max_model_len: null
max_num_seqs: 1024
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 2
log_prob_use_dynamic_bsz: false
log_prob_max_token_len_per_gpu: 16384
disable_log_stats: true
enable_chunked_prefill: true
do_sample: true
'n': 1
engine_kwargs:
swap_space: null
val_kwargs:
top_k: -1
top_p: 1.0
temperature: 0.6
'n': 32
do_sample: true
reward_model:
enable: false
strategy: fsdp
model:
input_tokenizer: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
path: ~/models/FsfairX-LLaMA3-RM-v0.1
external_lib: null
use_remove_padding: false
fsdp_config:
wrap_policy:
min_num_params: 0
param_offload: false
fsdp_size: -1
micro_batch_size: null
micro_batch_size_per_gpu: null
max_length: null
ulysses_sequence_parallel_size: 1
use_dynamic_bsz: false
forward_max_token_len_per_gpu: 16384
reward_manager: prime
reward_kwargs:
format_reward: 0.0
format_type: null
custom_reward_function:
path: null
name: compute_score
trainer:
balance_batch: true
total_epochs: 100
total_training_steps: 100
project_name: value-LLM
experiment_name: brm-ace-numina-ace-numina-r1qwen1.5B-base_lr2.5e-6-bfsz1024-mbs2048-beta0.002
logger:
- console
- wandb
log_val_generations: 0
nnodes: 1
n_gpus_per_node: 8
save_freq: 5
resume_mode: auto
resume_from_path: null
val_before_train: false
test_freq: -1
default_hdfs_dir: null
del_local_ckpt_after_load: false
default_local_dir: ./BRM
max_actor_ckpt_to_keep: 1
ray_wait_register_center_timeout: 300
hf_token: null
resume_from_hf:
enable: false
hf_repo_id: null
hf_token: null
revision: main
algorithm:
kl_ctrl:
kl_coef: 0.002