Update README.md
Browse files
README.md
CHANGED
@@ -257,7 +257,32 @@ Or you can try out Gemma-T4 (Thanks to Sleepdeprived) : https://huggingface.co/s
|
|
257 |
<details>
|
258 |
<summary>SFT Trainer Config</summary>
|
259 |
<pre><code>
|
260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
</code></pre>
|
262 |
</details>
|
263 |
</div>
|
|
|
257 |
<details>
|
258 |
<summary>SFT Trainer Config</summary>
|
259 |
<pre><code>
|
260 |
+
trainer = SFTTrainer(
|
261 |
+
model=model,
|
262 |
+
tokenizer=tokenizer,
|
263 |
+
train_dataset=dataset,
|
264 |
+
eval_dataset=None,
|
265 |
+
args=SFTConfig(
|
266 |
+
dataset_text_field="text",
|
267 |
+
per_device_train_batch_size=1,
|
268 |
+
gradient_accumulation_steps=4,
|
269 |
+
warmup_steps=50,
|
270 |
+
num_train_epochs=1,
|
271 |
+
learning_rate=1e-4,
|
272 |
+
max_grad_norm=0.2,
|
273 |
+
logging_steps=1,
|
274 |
+
optim="paged_adamw_8bit",
|
275 |
+
weight_decay=0.01,
|
276 |
+
lr_scheduler_type="cosine",
|
277 |
+
seed=3407,
|
278 |
+
report_to="wandb",
|
279 |
+
output_dir = "outputs",
|
280 |
+
save_strategy = "steps",
|
281 |
+
save_steps = 500,
|
282 |
+
adam_beta1=0.92,
|
283 |
+
adam_beta2=0.999,
|
284 |
+
),
|
285 |
+
)
|
286 |
</code></pre>
|
287 |
</details>
|
288 |
</div>
|