openai_base_url: "https://openrouter.ai/api/v1/" student_model_name: "meta-llama/Llama-3.2-3B-Instruct" teacher_model_name: "deepseek/deepseek-r1-distill-qwen-14b" dataset_name: "open-r1/OpenR1-Math-220k" output_dir: "results/llama/" save_steps: 50 learning_rate: 5.0e-07 max_new_tokens: 3072 max_feedback_new_tokens: 4096 num_return_sequences: 4 seed: 12345 temperature: 0.7 top_p: 0.9 top_k: 50 max_seq_length: 4096 cache_dir: "~/.cache" warmup_steps: 100 total_steps: 1000 max_grad_norm: 0.1 grpo_beta: 0.05 sft_beta: 0.05 thought_process_weight: 0.07 answer_weight: 0.1 format_weight: 0.03