zerofata commited on
Commit
c62a737
·
verified ·
1 Parent(s): 4c4b72b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +186 -57
README.md CHANGED
@@ -98,38 +98,56 @@ body {font-family: sans-serif; background-color: #080c14; color: #e1e9f0; line-h
98
  .data-arrow {color: #33ff99; width: 20px; display: inline-block;}
99
  .data-label {color: #00c3ff; width: 80px; display: inline-block;}
100
 
101
- /* Code display styling */
102
- .code-section {
103
- margin: 15px 0;
104
- border-left: 2px solid #33ff99;
105
- background-color: rgba(0, 0, 0, 0.3);
106
- overflow-x: auto;
107
- }
108
- .code-header {
109
- background-color: rgba(51, 255, 153, 0.1);
110
- padding: 8px 15px;
111
  font-family: 'Orbitron', sans-serif;
112
- color: #33ff99;
113
- font-size: 0.9rem;
114
- letter-spacing: 1px;
115
- border-bottom: 1px solid rgba(51, 255, 153, 0.2);
116
  }
117
- .code-content {
118
- padding: 15px;
119
- font-family: 'JetBrains Mono', monospace;
120
- font-size: 0.85rem;
121
- line-height: 1.4;
 
 
 
 
 
 
 
122
  color: #e1e9f0;
123
- white-space: pre;
 
 
124
  }
125
- .code-comment {
126
- color: #5f8baa;
 
 
 
 
 
127
  }
128
- .code-key {
129
- color: #00c3ff;
 
 
 
 
 
 
130
  }
131
- .code-value {
 
 
 
 
132
  color: #e1e9f0;
 
133
  }
134
 
135
  /* Subheading styling */
@@ -248,16 +266,22 @@ a:hover {text-decoration: underline;}
248
  <div class="section-content">
249
  <p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
250
  <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
251
- <h3 class="subheading">Axolotl Configurations</h3>
252
- <p>SFT Configuration:</p>
253
- <div style="background-color: #0a0e16; padding: 15px; border-radius: 4px; border-left: 2px solid #33ff99; margin-bottom: 20px; overflow-x: auto;">
254
- <code style="font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; display: block; white-space: pre;">base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
 
 
 
255
  model_type: AutoModelForCausalLM
256
  tokenizer_type: AutoTokenizer
257
  special_tokens:
258
  pad_token: "<|finetune_right_pad_id|>"
259
  chat_template: llama3
260
 
 
 
 
261
  datasets:
262
  - path: ./dataset.jsonl
263
  type: chat_template
@@ -272,44 +296,110 @@ datasets:
272
  assistant: ["assistant"]
273
  system: ["system"]
274
 
275
- test_datasets: [...]
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
- # Training configuration
278
- train_on_inputs: false
279
- num_epochs: 2
280
- micro_batch_size: 4
281
- gradient_accumulation_steps: 2
282
- learning_rate: 1.5e-5
283
 
284
- # LoRA parameters
 
 
285
  adapter: qlora
286
  load_in_4bit: true
287
  lora_r: 64
288
  lora_alpha: 128
289
  lora_dropout: 0.1
290
  lora_target_linear: true
 
291
 
292
- # Sequence handling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  sequence_len: 8192
294
  sample_packing: true
 
295
  pad_to_sequence_len: true
296
 
297
- # Hardware optimizations
 
 
298
  bf16: auto
299
  flash_attention: true
300
- gradient_checkpointing: true</code>
301
- </div>
302
- <p>DPO Configuration:</p>
303
- <div style="background-color: #0a0e16; padding: 15px; border-radius: 4px; border-left: 2px solid #33ff99; margin-bottom: 20px; overflow-x: auto;">
304
- <code style="font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; display: block; white-space: pre;">base_model: ApocalypseParty/unleashed-fulldata30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  model_type: AutoModelForCausalLM
306
  tokenizer_type: AutoTokenizer
 
307
  chat_template: tokenizer_default
308
 
309
- # DPO specific
 
 
310
  rl: dpo
311
  rl_beta: 0.07
312
 
 
 
 
313
  datasets:
314
  - path: ./dpo_cleaned-v3_deduplicated.jsonl
315
  type: chat_template.default
@@ -323,33 +413,72 @@ datasets:
323
  system: ["system"]
324
  user: ["user"]
325
  assistant: ["assistant"]
 
 
326
 
327
- # Training configuration
328
- train_on_inputs: false
329
- num_epochs: 1
330
- micro_batch_size: 4
331
- gradient_accumulation_steps: 2
332
- learning_rate: 2e-6
333
-
334
- # LoRA parameters
335
  adapter: qlora
336
  load_in_4bit: true
337
  lora_r: 32
338
  lora_alpha: 64
339
  lora_dropout: 0.05
340
  lora_target_linear: true
 
341
 
342
- # Sequence handling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  sequence_len: 4096
344
  pad_to_sequence_len: true
345
 
346
- # Hardware optimizations
 
 
347
  bf16: auto
 
348
  flash_attention: true
349
  gradient_checkpointing: offload
350
- deepspeed: deepspeed_configs/zero1.json</code>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  </div>
352
- <p>Full configurations are available in the repository for those interested in complete training details.</p>
353
  </div>
354
  </div>
355
  </div>
 
98
  .data-arrow {color: #33ff99; width: 20px; display: inline-block;}
99
  .data-label {color: #00c3ff; width: 80px; display: inline-block;}
100
 
101
+ /* Code config styling */
102
+ .config-title {
103
+ color: #00c3ff;
104
+ font-size: 1.4rem;
105
+ text-transform: uppercase;
106
+ letter-spacing: 2px;
107
+ margin-bottom: 5px;
 
 
 
108
  font-family: 'Orbitron', sans-serif;
 
 
 
 
109
  }
110
+
111
+ .config-underline {
112
+ width: 100%;
113
+ border-bottom: 1px dashed #00c3ff;
114
+ margin-bottom: 20px;
115
+ }
116
+
117
+ .config-section {
118
+ margin-bottom: 40px;
119
+ }
120
+
121
+ .config-subtitle {
122
  color: #e1e9f0;
123
+ font-size: 1.2rem;
124
+ margin: 25px 0 15px 0;
125
+ font-weight: normal;
126
  }
127
+
128
+ .config-block {
129
+ position: relative;
130
+ background-color: #111927;
131
+ padding: 20px 20px 20px 25px;
132
+ border-radius: 4px;
133
+ overflow-x: auto;
134
  }
135
+
136
+ .config-line {
137
+ position: absolute;
138
+ left: 0;
139
+ top: 0;
140
+ bottom: 0;
141
+ width: 4px;
142
+ background-color: #33ff99;
143
  }
144
+
145
+ .config-code {
146
+ font-family: 'JetBrains Mono', monospace;
147
+ font-size: 0.9rem;
148
+ line-height: 1.7;
149
  color: #e1e9f0;
150
+ white-space: pre;
151
  }
152
 
153
  /* Subheading styling */
 
266
  <div class="section-content">
267
  <p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
268
  <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
269
+ <div class="config-section">
270
+ <div class="config-title">AXOLOTL CONFIGURATIONS</div>
271
+ <div class="config-underline"></div>
272
+ <div class="config-subtitle">SFT Configuration:</div>
273
+ <div class="config-block">
274
+ <div class="config-line"></div>
275
+ <pre class="config-code">base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
276
  model_type: AutoModelForCausalLM
277
  tokenizer_type: AutoTokenizer
278
  special_tokens:
279
  pad_token: "<|finetune_right_pad_id|>"
280
  chat_template: llama3
281
 
282
+ # ====================
283
+ # DATASET CONFIGURATION
284
+ # ====================
285
  datasets:
286
  - path: ./dataset.jsonl
287
  type: chat_template
 
296
  assistant: ["assistant"]
297
  system: ["system"]
298
 
299
+ test_datasets:
300
+ - path: ./validate_dataset.jsonl
301
+ type: chat_template
302
+ split: train
303
+ chat_template_strategy: tokenizer
304
+ field_messages: messages
305
+ message_property_mappings:
306
+ role: role
307
+ content: content
308
+ roles:
309
+ user: ["user"]
310
+ assistant: ["assistant"]
311
+ system: ["system"]
312
 
313
+ dataset_prepared_path:
314
+ train_on_inputs: false # Only train on assistant responses
 
 
 
 
315
 
316
+ # ====================
317
+ # QLORA CONFIGURATION
318
+ # ====================
319
  adapter: qlora
320
  load_in_4bit: true
321
  lora_r: 64
322
  lora_alpha: 128
323
  lora_dropout: 0.1
324
  lora_target_linear: true
325
+ # lora_modules_to_save: # Uncomment only if you added NEW tokens
326
 
327
+ # ====================
328
+ # TRAINING PARAMETERS
329
+ # ====================
330
+ num_epochs: 2
331
+ micro_batch_size: 4
332
+ gradient_accumulation_steps: 2
333
+ learning_rate: 1.5e-5
334
+ optimizer: paged_adamw_8bit
335
+ lr_scheduler: rex
336
+ warmup_ratio: 0.05
337
+ weight_decay: 0.01
338
+ max_grad_norm: 1.0
339
+
340
+ # ====================
341
+ # SEQUENCE & PACKING
342
+ # ====================
343
  sequence_len: 8192
344
  sample_packing: true
345
+ eval_sample_packing: false
346
  pad_to_sequence_len: true
347
 
348
+ # ====================
349
+ # HARDWARE OPTIMIZATIONS
350
+ # ====================
351
  bf16: auto
352
  flash_attention: true
353
+ gradient_checkpointing: true
354
+
355
+ # ====================
356
+ # EVALUATION & CHECKPOINTING
357
+ # ====================
358
+ evaluation_strategy: steps
359
+ eval_steps: 5
360
+ save_strategy: steps
361
+ save_steps: 5
362
+ save_total_limit: 5 # Keep best + last few checkpoints
363
+ load_best_model_at_end: true
364
+ metric_for_best_model: eval_loss
365
+ greater_is_better: false
366
+ early_stopping_patience: 5
367
+
368
+ # ====================
369
+ # LOGGING & OUTPUT
370
+ # ====================
371
+ output_dir: ./output_model
372
+ logging_steps: 2
373
+ save_safetensors: true
374
+
375
+ # ====================
376
+ # WANDB TRACKING
377
+ # ====================
378
+ wandb_project: project_name
379
+ # wandb_entity: your_entity # Uncomment and set if needed
380
+ # wandb_name: your_run_name # Uncomment and set if needed</pre>
381
+ </div>
382
+ <div class="config-subtitle">DPO Configuration:</div>
383
+ <div class="config-block">
384
+ <div class="config-line"></div>
385
+ <pre class="config-code"># ====================
386
+ # MODEL CONFIGURATION
387
+ # ====================
388
+ base_model: ApocalypseParty/unleashed-fulldata30
389
  model_type: AutoModelForCausalLM
390
  tokenizer_type: AutoTokenizer
391
+ special_tokens: {}
392
  chat_template: tokenizer_default
393
 
394
+ # ====================
395
+ # RL/DPO CONFIGURATION
396
+ # ====================
397
  rl: dpo
398
  rl_beta: 0.07
399
 
400
+ # ====================
401
+ # DATASET CONFIGURATION
402
+ # ====================
403
  datasets:
404
  - path: ./dpo_cleaned-v3_deduplicated.jsonl
405
  type: chat_template.default
 
413
  system: ["system"]
414
  user: ["user"]
415
  assistant: ["assistant"]
416
+ dataset_prepared_path:
417
+ train_on_inputs: false # Only train on assistant responses
418
 
419
+ # ====================
420
+ # QLORA CONFIGURATION
421
+ # ====================
 
 
 
 
 
422
  adapter: qlora
423
  load_in_4bit: true
424
  lora_r: 32
425
  lora_alpha: 64
426
  lora_dropout: 0.05
427
  lora_target_linear: true
428
+ # lora_modules_to_save: # Uncomment only if you added NEW tokens
429
 
430
+ # ====================
431
+ # TRAINING PARAMETERS
432
+ # ====================
433
+ num_epochs: 1
434
+ micro_batch_size: 4
435
+ gradient_accumulation_steps: 2
436
+ learning_rate: 2e-6
437
+ optimizer: adamw_8bit
438
+ lr_scheduler: cosine
439
+ warmup_steps: 5
440
+ weight_decay: 0.01
441
+ max_grad_norm: 1.0
442
+
443
+ # ====================
444
+ # SEQUENCE CONFIGURATION
445
+ # ====================
446
  sequence_len: 4096
447
  pad_to_sequence_len: true
448
 
449
+ # ====================
450
+ # HARDWARE OPTIMIZATIONS
451
+ # ====================
452
  bf16: auto
453
+ tf32: false
454
  flash_attention: true
455
  gradient_checkpointing: offload
456
+ deepspeed: deepspeed_configs/zero1.json
457
+
458
+ # ====================
459
+ # CHECKPOINTING
460
+ # ====================
461
+ save_steps: 10
462
+ save_total_limit: 10
463
+ load_best_model_at_end: true
464
+ metric_for_best_model: eval_loss
465
+ greater_is_better: false
466
+
467
+ # ====================
468
+ # LOGGING & OUTPUT
469
+ # ====================
470
+ output_dir: ./dpo_model
471
+ logging_steps: 2
472
+ save_safetensors: true
473
+
474
+ # ====================
475
+ # WANDB TRACKING
476
+ # ====================
477
+ wandb_project: project_name
478
+ # wandb_entity: your_entity # Uncomment and set if needed
479
+ # wandb_name: your_run_name # Uncomment and set if needed</pre>
480
+ </div>
481
  </div>
 
482
  </div>
483
  </div>
484
  </div>