lblaoke's picture
Update README.md
4c53293 verified
metadata
datasets:
  - Skywork/Skywork-Reward-Preference-80K-v0.1
base_model:
  - turboderp/Qwama-0.5B-Instruct

lora_rank: 32

pref_beta: 0.1

cutoff_len: 2048

per_device_train_batch_size: 2

gradient_accumulation_steps: 8

learning_rate: 5.0e-6

num_train_epochs: 1.0

lr_scheduler_type: cosine

warmup_ratio: 0.1