zerofata
/

L3.3-GeneticLemonade-Unleashed-v3-70B

@@ -216,13 +216,19 @@ a:hover {text-decoration: underline;}
       <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
       <h3 class="subheading">SFT 1*H200</h3>
       <div class="data-box">
-        <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;">base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 special_tokens:
   pad_token: "<|finetune_right_pad_id|>"
 chat_template: llama3
 datasets:
   - path: ./dataset.jsonl
     type: chat_template
@@ -237,14 +243,37 @@ datasets:
       assistant: ["assistant"]
       system: ["system"]
-train_on_inputs: false
 adapter: qlora
 load_in_4bit: true
 lora_r: 64
 lora_alpha: 128
 lora_dropout: 0.1
 lora_target_linear: true
 num_epochs: 2
 micro_batch_size: 4
 gradient_accumulation_steps: 2
@@ -255,25 +284,68 @@ warmup_ratio: 0.05
 weight_decay: 0.01
 max_grad_norm: 1.0
 sequence_len: 8192
 sample_packing: true
 pad_to_sequence_len: true
 bf16: auto
 flash_attention: true
-gradient_checkpointing: true</pre>
       </div>
       <h3 class="subheading">DPO 2*H200</h3>
       <div class="data-box">
-        <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;">base_model: ApocalypseParty/unleashed-fulldata30
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 special_tokens: {}
 chat_template: tokenizer_default
 rl: dpo
 rl_beta: 0.07
 datasets:
   - path: ./dpo_cleaned-v3_deduplicated.jsonl
     type: chat_template.default
@@ -287,15 +359,23 @@ datasets:
       system: ["system"]
       user: ["user"]
       assistant: ["assistant"]
-train_on_inputs: false
 adapter: qlora
 load_in_4bit: true
 lora_r: 32
 lora_alpha: 64
 lora_dropout: 0.05
 lora_target_linear: true
 num_epochs: 1
 micro_batch_size: 4
 gradient_accumulation_steps: 2
@@ -306,17 +386,46 @@ warmup_steps: 5
 weight_decay: 0.01
 max_grad_norm: 1.0
 sequence_len: 4096
 pad_to_sequence_len: true
 bf16: auto
 tf32: false
 flash_attention: true
 gradient_checkpointing: offload
-deepspeed: deepspeed_configs/zero1.json</pre>
       </div>
     </div>
   </div>
 </div>
 </body>
-</html>

       <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
       <h3 class="subheading">SFT 1*H200</h3>
       <div class="data-box">
+        <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"># ====================
+# MODEL CONFIGURATION
+# ====================
+base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 special_tokens:
   pad_token: "<|finetune_right_pad_id|>"
 chat_template: llama3
+# ====================
+# DATASET CONFIGURATION
+# ====================
 datasets:
   - path: ./dataset.jsonl
     type: chat_template
       assistant: ["assistant"]
       system: ["system"]
+test_datasets:
+  - path: ./validate_dataset.jsonl
+    type: chat_template
+    split: train
+    chat_template_strategy: tokenizer
+    field_messages: messages
+    message_property_mappings:
+      role: role
+      content: content
+    roles:
+      user: ["user"]
+      assistant: ["assistant"]
+      system: ["system"]
+dataset_prepared_path:
+train_on_inputs: false  # Only train on assistant responses
+# ====================
+# QLORA CONFIGURATION
+# ====================
 adapter: qlora
 load_in_4bit: true
 lora_r: 64
 lora_alpha: 128
 lora_dropout: 0.1
 lora_target_linear: true
+# lora_modules_to_save:  # Uncomment only if you added NEW tokens
+# ====================
+# TRAINING PARAMETERS
+# ====================
 num_epochs: 2
 micro_batch_size: 4
 gradient_accumulation_steps: 2
 weight_decay: 0.01
 max_grad_norm: 1.0
+# ====================
+# SEQUENCE & PACKING
+# ====================
 sequence_len: 8192
 sample_packing: true
+eval_sample_packing: false
 pad_to_sequence_len: true
+# ====================
+# HARDWARE OPTIMIZATIONS
+# ====================
 bf16: auto
 flash_attention: true
+gradient_checkpointing: true
+# ====================
+# EVALUATION & CHECKPOINTING
+# ====================
+evaluation_strategy: steps
+eval_steps: 5
+save_strategy: steps
+save_steps: 5
+save_total_limit: 5  # Keep best + last few checkpoints
+load_best_model_at_end: true
+metric_for_best_model: eval_loss
+greater_is_better: false
+early_stopping_patience: 5
+# ====================
+# LOGGING & OUTPUT
+# ====================
+output_dir: ./output_model
+logging_steps: 2
+save_safetensors: true
+# ====================
+# WANDB TRACKING
+# ====================
+wandb_project: project_name
+# wandb_entity: your_entity  # Uncomment and set if needed
+# wandb_name: your_run_name  # Uncomment and set if needed</pre>
       </div>
       <h3 class="subheading">DPO 2*H200</h3>
       <div class="data-box">
+        <pre style="overflow-x: auto; color: #e1e9f0; margin: 0;"># ====================
+# MODEL CONFIGURATION
+# ====================
+base_model: ApocalypseParty/unleashed-fulldata30
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 special_tokens: {}
 chat_template: tokenizer_default
+# ====================
+# RL/DPO CONFIGURATION
+# ====================
 rl: dpo
 rl_beta: 0.07
+# ====================
+# DATASET CONFIGURATION
+# ====================
 datasets:
   - path: ./dpo_cleaned-v3_deduplicated.jsonl
     type: chat_template.default
       system: ["system"]
       user: ["user"]
       assistant: ["assistant"]
+dataset_prepared_path:
+train_on_inputs: false  # Only train on assistant responses
+# ====================
+# QLORA CONFIGURATION
+# ====================
 adapter: qlora
 load_in_4bit: true
 lora_r: 32
 lora_alpha: 64
 lora_dropout: 0.05
 lora_target_linear: true
+# lora_modules_to_save:  # Uncomment only if you added NEW tokens
+# ====================
+# TRAINING PARAMETERS
+# ====================
 num_epochs: 1
 micro_batch_size: 4
 gradient_accumulation_steps: 2
 weight_decay: 0.01
 max_grad_norm: 1.0
+# ====================
+# SEQUENCE CONFIGURATION
+# ====================
 sequence_len: 4096
 pad_to_sequence_len: true
+# ====================
+# HARDWARE OPTIMIZATIONS
+# ====================
 bf16: auto
 tf32: false
 flash_attention: true
 gradient_checkpointing: offload
+deepspeed: deepspeed_configs/zero1.json
+# ====================
+# CHECKPOINTING
+# ====================
+save_steps: 10
+save_total_limit: 10
+load_best_model_at_end: true
+metric_for_best_model: eval_loss
+greater_is_better: false
+# ====================
+# LOGGING & OUTPUT
+# ====================
+output_dir: ./dpo_model
+logging_steps: 2
+save_safetensors: true
+# ====================
+# WANDB TRACKING
+# ====================
+wandb_project: project_name
+# wandb_entity: your_entity  # Uncomment and set if needed
+# wandb_name: your_run_name  # Uncomment and set if needed</pre>
       </div>
     </div>
   </div>
 </div>
 </body>
+</html>