gpt2-large-sft / config.yaml
gumran's picture
End of training
d52e265 verified
model_name_or_path: "openai-community/gpt2-large"
dataset_name_or_path: "allenai/tulu-3-sft-olmo-2-mixture-0225"
project_name: "scaling-post-training"
training_args:
seed: 42
num_train_epochs: 1
per_device_train_batch_size: 2
per_device_eval_batch_size: 2
gradient_accumulation_steps: 8
warmup_ratio: 0.05
weight_decay: 0.01
logging_steps: 10
eval_strategy: "steps"
eval_steps: 50
report_to: "wandb"
fp16: true
learning_rate: 3.0e-5
lr_scheduler_type: "cosine"
run_name: "gpt2-large-sft"
output_dir: "models/gpt2-large/sft"
save_strategy: "best"
metric_for_best_model: "eval_loss"
load_best_model_at_end: true
save_total_limit: 1
hub_model_id: "gpt2-large-sft"