Spaces:
Running
Running
data: | |
train_files: hiyouga/math12k@train | |
val_files: hiyouga/math12k@test | |
prompt_key: problem | |
answer_key: answer | |
image_key: images | |
max_prompt_length: 2048 | |
max_response_length: 2048 | |
rollout_batch_size: 512 | |
val_batch_size: 1024 | |
format_prompt: ./examples/format_prompt/math_format.jinja | |
override_chat_template: null | |
shuffle: true | |
seed: 1 | |
max_pixels: 4194304 | |
min_pixels: 262144 | |
filter_overlong_prompts: true | |
algorithm: | |
adv_estimator: grpo | |
disable_kl: false | |
use_kl_loss: true | |
kl_penalty: low_var_kl | |
kl_coef: 1.0e-2 | |
mock_data: test | |
worker: | |
actor: | |
global_batch_size: 128 | |
micro_batch_size_per_device_for_update: 2 | |
micro_batch_size_per_device_for_experience: 8 | |
max_grad_norm: 1.0 | |
padding_free: true | |
ulysses_sequence_parallel_size: 1 | |
model: | |
model_path: Qwen/Qwen2.5-7B-Instruct | |
enable_gradient_checkpointing: true | |
trust_remote_code: false | |
freeze_vision_tower: false | |
optim: | |
lr: 1.0e-6 | |
weight_decay: 1.0e-2 | |
strategy: adamw # {adamw, adamw_bf16} | |
lr_warmup_ratio: 0.0 | |
fsdp: | |
enable_full_shard: true | |
enable_cpu_offload: false | |
enable_rank0_init: true | |
offload: | |
offload_params: true # true: more CPU memory; false: more GPU memory | |
offload_optimizer: true # true: more CPU memory; false: more GPU memory | |
rollout: | |
n: 5 | |
temperature: 1.0 | |
top_p: 0.99 | |
gpu_memory_utilization: 0.7 | |
enforce_eager: false | |
enable_chunked_prefill: false | |
tensor_parallel_size: 2 | |
limit_images: 0 | |
val_override_config: | |
temperature: 1.0 | |
n: 1 | |
ref: | |
fsdp: | |
enable_full_shard: true | |
enable_cpu_offload: true # true: more CPU memory; false: more GPU memory | |
enable_rank0_init: true | |
offload: | |
offload_params: true | |
reward: | |
reward_type: batch | |
reward_function: ./examples/reward_function/math.py:compute_score | |
trainer: | |
total_epochs: 2 | |
max_steps: null | |
project_name: easy_r1 | |
experiment_name: qwen2_5_7b_math_grpo | |
logger: ["console", "wandb"] | |
nnodes: 1 | |
n_gpus_per_node: 8 | |
val_freq: 3 # -1 to disable | |
val_before_train: true | |
val_only: false | |
val_generations_to_log: 3 | |
save_freq: 5 # -1 to disable | |
save_limit: 3 # -1 to disable | |
save_checkpoint_path: your_checkpoint_path | |
load_checkpoint_path: null |