Delta-Vector commited on
Commit
642b324
·
verified ·
1 Parent(s): 99b5b81

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +26 -1
README.md CHANGED
@@ -257,7 +257,32 @@ Or you can try out Gemma-T4 (Thanks to Sleepdeprived) : https://huggingface.co/s
257
  <details>
258
  <summary>SFT Trainer Config</summary>
259
  <pre><code>
260
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  </code></pre>
262
  </details>
263
  </div>
 
257
  <details>
258
  <summary>SFT Trainer Config</summary>
259
  <pre><code>
260
+ trainer = SFTTrainer(
261
+ model=model,
262
+ tokenizer=tokenizer,
263
+ train_dataset=dataset,
264
+ eval_dataset=None,
265
+ args=SFTConfig(
266
+ dataset_text_field="text",
267
+ per_device_train_batch_size=1,
268
+ gradient_accumulation_steps=4,
269
+ warmup_steps=50,
270
+ num_train_epochs=1,
271
+ learning_rate=1e-4,
272
+ max_grad_norm=0.2,
273
+ logging_steps=1,
274
+ optim="paged_adamw_8bit",
275
+ weight_decay=0.01,
276
+ lr_scheduler_type="cosine",
277
+ seed=3407,
278
+ report_to="wandb",
279
+ output_dir = "outputs",
280
+ save_strategy = "steps",
281
+ save_steps = 500,
282
+ adam_beta1=0.92,
283
+ adam_beta2=0.999,
284
+ ),
285
+ )
286
  </code></pre>
287
  </details>
288
  </div>