zerofata commited on
Commit
ddde70b
·
verified ·
1 Parent(s): c0e74d3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +116 -7
README.md CHANGED
@@ -216,13 +216,19 @@ a:hover {text-decoration: underline;}
216
  <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
217
  <h3 class="subheading">SFT 1*H200</h3>
218
  <div class="data-box">
219
- <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;">base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
 
 
 
220
  model_type: AutoModelForCausalLM
221
  tokenizer_type: AutoTokenizer
222
  special_tokens:
223
  pad_token: "<|finetune_right_pad_id|>"
224
  chat_template: llama3
225
 
 
 
 
226
  datasets:
227
  - path: ./dataset.jsonl
228
  type: chat_template
@@ -237,14 +243,37 @@ datasets:
237
  assistant: ["assistant"]
238
  system: ["system"]
239
 
240
- train_on_inputs: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  adapter: qlora
242
  load_in_4bit: true
243
  lora_r: 64
244
  lora_alpha: 128
245
  lora_dropout: 0.1
246
  lora_target_linear: true
 
247
 
 
 
 
248
  num_epochs: 2
249
  micro_batch_size: 4
250
  gradient_accumulation_steps: 2
@@ -255,25 +284,68 @@ warmup_ratio: 0.05
255
  weight_decay: 0.01
256
  max_grad_norm: 1.0
257
 
 
 
 
258
  sequence_len: 8192
259
  sample_packing: true
 
260
  pad_to_sequence_len: true
261
 
 
 
 
262
  bf16: auto
263
  flash_attention: true
264
- gradient_checkpointing: true</pre>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  </div>
266
  <h3 class="subheading">DPO 2*H200</h3>
267
  <div class="data-box">
268
- <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;">base_model: ApocalypseParty/unleashed-fulldata30
 
 
 
269
  model_type: AutoModelForCausalLM
270
  tokenizer_type: AutoTokenizer
271
  special_tokens: {}
272
  chat_template: tokenizer_default
273
 
 
 
 
274
  rl: dpo
275
  rl_beta: 0.07
276
 
 
 
 
277
  datasets:
278
  - path: ./dpo_cleaned-v3_deduplicated.jsonl
279
  type: chat_template.default
@@ -287,15 +359,23 @@ datasets:
287
  system: ["system"]
288
  user: ["user"]
289
  assistant: ["assistant"]
 
 
290
 
291
- train_on_inputs: false
 
 
292
  adapter: qlora
293
  load_in_4bit: true
294
  lora_r: 32
295
  lora_alpha: 64
296
  lora_dropout: 0.05
297
  lora_target_linear: true
 
298
 
 
 
 
299
  num_epochs: 1
300
  micro_batch_size: 4
301
  gradient_accumulation_steps: 2
@@ -306,17 +386,46 @@ warmup_steps: 5
306
  weight_decay: 0.01
307
  max_grad_norm: 1.0
308
 
 
 
 
309
  sequence_len: 4096
310
  pad_to_sequence_len: true
311
 
 
 
 
312
  bf16: auto
313
  tf32: false
314
  flash_attention: true
315
  gradient_checkpointing: offload
316
- deepspeed: deepspeed_configs/zero1.json</pre>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  </div>
318
  </div>
319
  </div>
320
  </div>
321
  </body>
322
- </html>
 
216
  <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
217
  <h3 class="subheading">SFT 1*H200</h3>
218
  <div class="data-box">
219
+ <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"># ====================
220
+ # MODEL CONFIGURATION
221
+ # ====================
222
+ base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
223
  model_type: AutoModelForCausalLM
224
  tokenizer_type: AutoTokenizer
225
  special_tokens:
226
  pad_token: "<|finetune_right_pad_id|>"
227
  chat_template: llama3
228
 
229
+ # ====================
230
+ # DATASET CONFIGURATION
231
+ # ====================
232
  datasets:
233
  - path: ./dataset.jsonl
234
  type: chat_template
 
243
  assistant: ["assistant"]
244
  system: ["system"]
245
 
246
+ test_datasets:
247
+ - path: ./validate_dataset.jsonl
248
+ type: chat_template
249
+ split: train
250
+ chat_template_strategy: tokenizer
251
+ field_messages: messages
252
+ message_property_mappings:
253
+ role: role
254
+ content: content
255
+ roles:
256
+ user: ["user"]
257
+ assistant: ["assistant"]
258
+ system: ["system"]
259
+
260
+ dataset_prepared_path:
261
+ train_on_inputs: false # Only train on assistant responses
262
+
263
+ # ====================
264
+ # QLORA CONFIGURATION
265
+ # ====================
266
  adapter: qlora
267
  load_in_4bit: true
268
  lora_r: 64
269
  lora_alpha: 128
270
  lora_dropout: 0.1
271
  lora_target_linear: true
272
+ # lora_modules_to_save: # Uncomment only if you added NEW tokens
273
 
274
+ # ====================
275
+ # TRAINING PARAMETERS
276
+ # ====================
277
  num_epochs: 2
278
  micro_batch_size: 4
279
  gradient_accumulation_steps: 2
 
284
  weight_decay: 0.01
285
  max_grad_norm: 1.0
286
 
287
+ # ====================
288
+ # SEQUENCE & PACKING
289
+ # ====================
290
  sequence_len: 8192
291
  sample_packing: true
292
+ eval_sample_packing: false
293
  pad_to_sequence_len: true
294
 
295
+ # ====================
296
+ # HARDWARE OPTIMIZATIONS
297
+ # ====================
298
  bf16: auto
299
  flash_attention: true
300
+ gradient_checkpointing: true
301
+
302
+ # ====================
303
+ # EVALUATION & CHECKPOINTING
304
+ # ====================
305
+ evaluation_strategy: steps
306
+ eval_steps: 5
307
+ save_strategy: steps
308
+ save_steps: 5
309
+ save_total_limit: 5 # Keep best + last few checkpoints
310
+ load_best_model_at_end: true
311
+ metric_for_best_model: eval_loss
312
+ greater_is_better: false
313
+ early_stopping_patience: 5
314
+
315
+ # ====================
316
+ # LOGGING & OUTPUT
317
+ # ====================
318
+ output_dir: ./output_model
319
+ logging_steps: 2
320
+ save_safetensors: true
321
+
322
+ # ====================
323
+ # WANDB TRACKING
324
+ # ====================
325
+ wandb_project: project_name
326
+ # wandb_entity: your_entity # Uncomment and set if needed
327
+ # wandb_name: your_run_name # Uncomment and set if needed</pre>
328
  </div>
329
  <h3 class="subheading">DPO 2*H200</h3>
330
  <div class="data-box">
331
+ <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"># ====================
332
+ # MODEL CONFIGURATION
333
+ # ====================
334
+ base_model: ApocalypseParty/unleashed-fulldata30
335
  model_type: AutoModelForCausalLM
336
  tokenizer_type: AutoTokenizer
337
  special_tokens: {}
338
  chat_template: tokenizer_default
339
 
340
+ # ====================
341
+ # RL/DPO CONFIGURATION
342
+ # ====================
343
  rl: dpo
344
  rl_beta: 0.07
345
 
346
+ # ====================
347
+ # DATASET CONFIGURATION
348
+ # ====================
349
  datasets:
350
  - path: ./dpo_cleaned-v3_deduplicated.jsonl
351
  type: chat_template.default
 
359
  system: ["system"]
360
  user: ["user"]
361
  assistant: ["assistant"]
362
+ dataset_prepared_path:
363
+ train_on_inputs: false # Only train on assistant responses
364
 
365
+ # ====================
366
+ # QLORA CONFIGURATION
367
+ # ====================
368
  adapter: qlora
369
  load_in_4bit: true
370
  lora_r: 32
371
  lora_alpha: 64
372
  lora_dropout: 0.05
373
  lora_target_linear: true
374
+ # lora_modules_to_save: # Uncomment only if you added NEW tokens
375
 
376
+ # ====================
377
+ # TRAINING PARAMETERS
378
+ # ====================
379
  num_epochs: 1
380
  micro_batch_size: 4
381
  gradient_accumulation_steps: 2
 
386
  weight_decay: 0.01
387
  max_grad_norm: 1.0
388
 
389
+ # ====================
390
+ # SEQUENCE CONFIGURATION
391
+ # ====================
392
  sequence_len: 4096
393
  pad_to_sequence_len: true
394
 
395
+ # ====================
396
+ # HARDWARE OPTIMIZATIONS
397
+ # ====================
398
  bf16: auto
399
  tf32: false
400
  flash_attention: true
401
  gradient_checkpointing: offload
402
+ deepspeed: deepspeed_configs/zero1.json
403
+
404
+ # ====================
405
+ # CHECKPOINTING
406
+ # ====================
407
+ save_steps: 10
408
+ save_total_limit: 10
409
+ load_best_model_at_end: true
410
+ metric_for_best_model: eval_loss
411
+ greater_is_better: false
412
+
413
+ # ====================
414
+ # LOGGING & OUTPUT
415
+ # ====================
416
+ output_dir: ./dpo_model
417
+ logging_steps: 2
418
+ save_safetensors: true
419
+
420
+ # ====================
421
+ # WANDB TRACKING
422
+ # ====================
423
+ wandb_project: project_name
424
+ # wandb_entity: your_entity # Uncomment and set if needed
425
+ # wandb_name: your_run_name # Uncomment and set if needed</pre>
426
  </div>
427
  </div>
428
  </div>
429
  </div>
430
  </body>
431
+ </html>