Update README.md
Browse files
README.md
CHANGED
@@ -216,13 +216,19 @@ a:hover {text-decoration: underline;}
|
|
216 |
<p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
|
217 |
<h3 class="subheading">SFT 1*H200</h3>
|
218 |
<div class="data-box">
|
219 |
-
<pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"
|
|
|
|
|
|
|
220 |
model_type: AutoModelForCausalLM
|
221 |
tokenizer_type: AutoTokenizer
|
222 |
special_tokens:
|
223 |
pad_token: "<|finetune_right_pad_id|>"
|
224 |
chat_template: llama3
|
225 |
|
|
|
|
|
|
|
226 |
datasets:
|
227 |
- path: ./dataset.jsonl
|
228 |
type: chat_template
|
@@ -237,14 +243,37 @@ datasets:
|
|
237 |
assistant: ["assistant"]
|
238 |
system: ["system"]
|
239 |
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
adapter: qlora
|
242 |
load_in_4bit: true
|
243 |
lora_r: 64
|
244 |
lora_alpha: 128
|
245 |
lora_dropout: 0.1
|
246 |
lora_target_linear: true
|
|
|
247 |
|
|
|
|
|
|
|
248 |
num_epochs: 2
|
249 |
micro_batch_size: 4
|
250 |
gradient_accumulation_steps: 2
|
@@ -255,25 +284,68 @@ warmup_ratio: 0.05
|
|
255 |
weight_decay: 0.01
|
256 |
max_grad_norm: 1.0
|
257 |
|
|
|
|
|
|
|
258 |
sequence_len: 8192
|
259 |
sample_packing: true
|
|
|
260 |
pad_to_sequence_len: true
|
261 |
|
|
|
|
|
|
|
262 |
bf16: auto
|
263 |
flash_attention: true
|
264 |
-
gradient_checkpointing: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
</div>
|
266 |
<h3 class="subheading">DPO 2*H200</h3>
|
267 |
<div class="data-box">
|
268 |
-
<pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"
|
|
|
|
|
|
|
269 |
model_type: AutoModelForCausalLM
|
270 |
tokenizer_type: AutoTokenizer
|
271 |
special_tokens: {}
|
272 |
chat_template: tokenizer_default
|
273 |
|
|
|
|
|
|
|
274 |
rl: dpo
|
275 |
rl_beta: 0.07
|
276 |
|
|
|
|
|
|
|
277 |
datasets:
|
278 |
- path: ./dpo_cleaned-v3_deduplicated.jsonl
|
279 |
type: chat_template.default
|
@@ -287,15 +359,23 @@ datasets:
|
|
287 |
system: ["system"]
|
288 |
user: ["user"]
|
289 |
assistant: ["assistant"]
|
|
|
|
|
290 |
|
291 |
-
|
|
|
|
|
292 |
adapter: qlora
|
293 |
load_in_4bit: true
|
294 |
lora_r: 32
|
295 |
lora_alpha: 64
|
296 |
lora_dropout: 0.05
|
297 |
lora_target_linear: true
|
|
|
298 |
|
|
|
|
|
|
|
299 |
num_epochs: 1
|
300 |
micro_batch_size: 4
|
301 |
gradient_accumulation_steps: 2
|
@@ -306,17 +386,46 @@ warmup_steps: 5
|
|
306 |
weight_decay: 0.01
|
307 |
max_grad_norm: 1.0
|
308 |
|
|
|
|
|
|
|
309 |
sequence_len: 4096
|
310 |
pad_to_sequence_len: true
|
311 |
|
|
|
|
|
|
|
312 |
bf16: auto
|
313 |
tf32: false
|
314 |
flash_attention: true
|
315 |
gradient_checkpointing: offload
|
316 |
-
deepspeed: deepspeed_configs/zero1.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
</div>
|
318 |
</div>
|
319 |
</div>
|
320 |
</div>
|
321 |
</body>
|
322 |
-
</html>
|
|
|
216 |
<p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
|
217 |
<h3 class="subheading">SFT 1*H200</h3>
|
218 |
<div class="data-box">
|
219 |
+
<pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"># ====================
|
220 |
+
# MODEL CONFIGURATION
|
221 |
+
# ====================
|
222 |
+
base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
|
223 |
model_type: AutoModelForCausalLM
|
224 |
tokenizer_type: AutoTokenizer
|
225 |
special_tokens:
|
226 |
pad_token: "<|finetune_right_pad_id|>"
|
227 |
chat_template: llama3
|
228 |
|
229 |
+
# ====================
|
230 |
+
# DATASET CONFIGURATION
|
231 |
+
# ====================
|
232 |
datasets:
|
233 |
- path: ./dataset.jsonl
|
234 |
type: chat_template
|
|
|
243 |
assistant: ["assistant"]
|
244 |
system: ["system"]
|
245 |
|
246 |
+
test_datasets:
|
247 |
+
- path: ./validate_dataset.jsonl
|
248 |
+
type: chat_template
|
249 |
+
split: train
|
250 |
+
chat_template_strategy: tokenizer
|
251 |
+
field_messages: messages
|
252 |
+
message_property_mappings:
|
253 |
+
role: role
|
254 |
+
content: content
|
255 |
+
roles:
|
256 |
+
user: ["user"]
|
257 |
+
assistant: ["assistant"]
|
258 |
+
system: ["system"]
|
259 |
+
|
260 |
+
dataset_prepared_path:
|
261 |
+
train_on_inputs: false # Only train on assistant responses
|
262 |
+
|
263 |
+
# ====================
|
264 |
+
# QLORA CONFIGURATION
|
265 |
+
# ====================
|
266 |
adapter: qlora
|
267 |
load_in_4bit: true
|
268 |
lora_r: 64
|
269 |
lora_alpha: 128
|
270 |
lora_dropout: 0.1
|
271 |
lora_target_linear: true
|
272 |
+
# lora_modules_to_save: # Uncomment only if you added NEW tokens
|
273 |
|
274 |
+
# ====================
|
275 |
+
# TRAINING PARAMETERS
|
276 |
+
# ====================
|
277 |
num_epochs: 2
|
278 |
micro_batch_size: 4
|
279 |
gradient_accumulation_steps: 2
|
|
|
284 |
weight_decay: 0.01
|
285 |
max_grad_norm: 1.0
|
286 |
|
287 |
+
# ====================
|
288 |
+
# SEQUENCE & PACKING
|
289 |
+
# ====================
|
290 |
sequence_len: 8192
|
291 |
sample_packing: true
|
292 |
+
eval_sample_packing: false
|
293 |
pad_to_sequence_len: true
|
294 |
|
295 |
+
# ====================
|
296 |
+
# HARDWARE OPTIMIZATIONS
|
297 |
+
# ====================
|
298 |
bf16: auto
|
299 |
flash_attention: true
|
300 |
+
gradient_checkpointing: true
|
301 |
+
|
302 |
+
# ====================
|
303 |
+
# EVALUATION & CHECKPOINTING
|
304 |
+
# ====================
|
305 |
+
evaluation_strategy: steps
|
306 |
+
eval_steps: 5
|
307 |
+
save_strategy: steps
|
308 |
+
save_steps: 5
|
309 |
+
save_total_limit: 5 # Keep best + last few checkpoints
|
310 |
+
load_best_model_at_end: true
|
311 |
+
metric_for_best_model: eval_loss
|
312 |
+
greater_is_better: false
|
313 |
+
early_stopping_patience: 5
|
314 |
+
|
315 |
+
# ====================
|
316 |
+
# LOGGING & OUTPUT
|
317 |
+
# ====================
|
318 |
+
output_dir: ./output_model
|
319 |
+
logging_steps: 2
|
320 |
+
save_safetensors: true
|
321 |
+
|
322 |
+
# ====================
|
323 |
+
# WANDB TRACKING
|
324 |
+
# ====================
|
325 |
+
wandb_project: project_name
|
326 |
+
# wandb_entity: your_entity # Uncomment and set if needed
|
327 |
+
# wandb_name: your_run_name # Uncomment and set if needed</pre>
|
328 |
</div>
|
329 |
<h3 class="subheading">DPO 2*H200</h3>
|
330 |
<div class="data-box">
|
331 |
+
<pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"># ====================
|
332 |
+
# MODEL CONFIGURATION
|
333 |
+
# ====================
|
334 |
+
base_model: ApocalypseParty/unleashed-fulldata30
|
335 |
model_type: AutoModelForCausalLM
|
336 |
tokenizer_type: AutoTokenizer
|
337 |
special_tokens: {}
|
338 |
chat_template: tokenizer_default
|
339 |
|
340 |
+
# ====================
|
341 |
+
# RL/DPO CONFIGURATION
|
342 |
+
# ====================
|
343 |
rl: dpo
|
344 |
rl_beta: 0.07
|
345 |
|
346 |
+
# ====================
|
347 |
+
# DATASET CONFIGURATION
|
348 |
+
# ====================
|
349 |
datasets:
|
350 |
- path: ./dpo_cleaned-v3_deduplicated.jsonl
|
351 |
type: chat_template.default
|
|
|
359 |
system: ["system"]
|
360 |
user: ["user"]
|
361 |
assistant: ["assistant"]
|
362 |
+
dataset_prepared_path:
|
363 |
+
train_on_inputs: false # Only train on assistant responses
|
364 |
|
365 |
+
# ====================
|
366 |
+
# QLORA CONFIGURATION
|
367 |
+
# ====================
|
368 |
adapter: qlora
|
369 |
load_in_4bit: true
|
370 |
lora_r: 32
|
371 |
lora_alpha: 64
|
372 |
lora_dropout: 0.05
|
373 |
lora_target_linear: true
|
374 |
+
# lora_modules_to_save: # Uncomment only if you added NEW tokens
|
375 |
|
376 |
+
# ====================
|
377 |
+
# TRAINING PARAMETERS
|
378 |
+
# ====================
|
379 |
num_epochs: 1
|
380 |
micro_batch_size: 4
|
381 |
gradient_accumulation_steps: 2
|
|
|
386 |
weight_decay: 0.01
|
387 |
max_grad_norm: 1.0
|
388 |
|
389 |
+
# ====================
|
390 |
+
# SEQUENCE CONFIGURATION
|
391 |
+
# ====================
|
392 |
sequence_len: 4096
|
393 |
pad_to_sequence_len: true
|
394 |
|
395 |
+
# ====================
|
396 |
+
# HARDWARE OPTIMIZATIONS
|
397 |
+
# ====================
|
398 |
bf16: auto
|
399 |
tf32: false
|
400 |
flash_attention: true
|
401 |
gradient_checkpointing: offload
|
402 |
+
deepspeed: deepspeed_configs/zero1.json
|
403 |
+
|
404 |
+
# ====================
|
405 |
+
# CHECKPOINTING
|
406 |
+
# ====================
|
407 |
+
save_steps: 10
|
408 |
+
save_total_limit: 10
|
409 |
+
load_best_model_at_end: true
|
410 |
+
metric_for_best_model: eval_loss
|
411 |
+
greater_is_better: false
|
412 |
+
|
413 |
+
# ====================
|
414 |
+
# LOGGING & OUTPUT
|
415 |
+
# ====================
|
416 |
+
output_dir: ./dpo_model
|
417 |
+
logging_steps: 2
|
418 |
+
save_safetensors: true
|
419 |
+
|
420 |
+
# ====================
|
421 |
+
# WANDB TRACKING
|
422 |
+
# ====================
|
423 |
+
wandb_project: project_name
|
424 |
+
# wandb_entity: your_entity # Uncomment and set if needed
|
425 |
+
# wandb_name: your_run_name # Uncomment and set if needed</pre>
|
426 |
</div>
|
427 |
</div>
|
428 |
</div>
|
429 |
</div>
|
430 |
</body>
|
431 |
+
</html>
|