|
openai_base_url: "https://openrouter.ai/api/v1/" |
|
student_model_name: "meta-llama/Llama-3.2-3B-Instruct" |
|
teacher_model_name: "deepseek/deepseek-r1-distill-qwen-14b" |
|
dataset_name: "open-r1/OpenR1-Math-220k" |
|
output_dir: "results/llama/" |
|
save_steps: 50 |
|
learning_rate: 5.0e-07 |
|
max_new_tokens: 3072 |
|
max_feedback_new_tokens: 4096 |
|
num_return_sequences: 4 |
|
seed: 12345 |
|
temperature: 0.7 |
|
top_p: 0.9 |
|
top_k: 50 |
|
max_seq_length: 4096 |
|
cache_dir: "~/.cache" |
|
warmup_steps: 100 |
|
total_steps: 1000 |
|
max_grad_norm: 0.1 |
|
grpo_beta: 0.05 |
|
sft_beta: 0.05 |
|
thought_process_weight: 0.07 |
|
answer_weight: 0.1 |
|
format_weight: 0.03 |