mkurman
/

Llama-3.2-MedIT-3B-R1

Model card Files Files and versions Community

Llama-3.2-MedIT-3B-R1 / config.yaml

mkurman's picture

v-1.0.0

49f50bc verified about 2 months ago

history blame contribute delete

593 Bytes

	openai_base_url: "https://openrouter.ai/api/v1/"
	student_model_name: "meta-llama/Llama-3.2-3B-Instruct"
	teacher_model_name: "deepseek/deepseek-r1-distill-qwen-14b"
	dataset_name: "open-r1/OpenR1-Math-220k"
	output_dir: "results/llama/"
	save_steps: 50
	learning_rate: 5.0e-07
	max_new_tokens: 3072
	max_feedback_new_tokens: 4096
	num_return_sequences: 4
	seed: 12345
	temperature: 0.7
	top_p: 0.9
	top_k: 50
	max_seq_length: 4096
	cache_dir: "~/.cache"
	warmup_steps: 100
	total_steps: 1000
	max_grad_norm: 0.1
	grpo_beta: 0.05
	sft_beta: 0.05
	thought_process_weight: 0.07
	answer_weight: 0.1
	format_weight: 0.03