zerofata
/

L3.3-GeneticLemonade-Unleashed-v3-70B

@@ -98,38 +98,56 @@ body {font-family: sans-serif; background-color: #080c14; color: #e1e9f0; line-h
 .data-arrow {color: #33ff99; width: 20px; display: inline-block;}
 .data-label {color: #00c3ff; width: 80px; display: inline-block;}
-/* Code display styling */
-.code-section {
-  margin: 15px 0;
-  border-left: 2px solid #33ff99;
-  background-color: rgba(0, 0, 0, 0.3);
-  overflow-x: auto;
-}
-.code-header {
-  background-color: rgba(51, 255, 153, 0.1);
-  padding: 8px 15px;
   font-family: 'Orbitron', sans-serif;
-  color: #33ff99;
-  font-size: 0.9rem;
-  letter-spacing: 1px;
-  border-bottom: 1px solid rgba(51, 255, 153, 0.2);
 }
-.code-content {
-  padding: 15px;
-  font-family: 'JetBrains Mono', monospace;
-  font-size: 0.85rem;
-  line-height: 1.4;
   color: #e1e9f0;
-  white-space: pre;
 }
-.code-comment {
-  color: #5f8baa;
 }
-.code-key {
-  color: #00c3ff;
 }
-.code-value {
   color: #e1e9f0;
 }
 /* Subheading styling */
@@ -248,16 +266,22 @@ a:hover {text-decoration: underline;}
     <div class="section-content">
       <p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
       <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
-      <h3 class="subheading">Axolotl Configurations</h3>
-      <p>SFT Configuration:</p>
-      <div style="background-color: #0a0e16; padding: 15px; border-radius: 4px; border-left: 2px solid #33ff99; margin-bottom: 20px; overflow-x: auto;">
-        <code style="font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; display: block; white-space: pre;">base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 special_tokens:
   pad_token: "<|finetune_right_pad_id|>"
 chat_template: llama3
 datasets:
   - path: ./dataset.jsonl
     type: chat_template
@@ -272,44 +296,110 @@ datasets:
       assistant: ["assistant"]
       system: ["system"]
-test_datasets: [...]
-# Training configuration
-train_on_inputs: false
-num_epochs: 2
-micro_batch_size: 4
-gradient_accumulation_steps: 2
-learning_rate: 1.5e-5
-# LoRA parameters
 adapter: qlora
 load_in_4bit: true
 lora_r: 64
 lora_alpha: 128
 lora_dropout: 0.1
 lora_target_linear: true
-# Sequence handling
 sequence_len: 8192
 sample_packing: true
 pad_to_sequence_len: true
-# Hardware optimizations
 bf16: auto
 flash_attention: true
-gradient_checkpointing: true</code>
-      </div>
-      <p>DPO Configuration:</p>
-      <div style="background-color: #0a0e16; padding: 15px; border-radius: 4px; border-left: 2px solid #33ff99; margin-bottom: 20px; overflow-x: auto;">
-        <code style="font-family: 'JetBrains Mono', monospace; font-size: 0.85rem; display: block; white-space: pre;">base_model: ApocalypseParty/unleashed-fulldata30
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 chat_template: tokenizer_default
-# DPO specific
 rl: dpo
 rl_beta: 0.07
 datasets:
   - path: ./dpo_cleaned-v3_deduplicated.jsonl
     type: chat_template.default
@@ -323,33 +413,72 @@ datasets:
       system: ["system"]
       user: ["user"]
       assistant: ["assistant"]
-# Training configuration
-train_on_inputs: false
-num_epochs: 1
-micro_batch_size: 4
-gradient_accumulation_steps: 2
-learning_rate: 2e-6
-# LoRA parameters
 adapter: qlora
 load_in_4bit: true
 lora_r: 32
 lora_alpha: 64
 lora_dropout: 0.05
 lora_target_linear: true
-# Sequence handling
 sequence_len: 4096
 pad_to_sequence_len: true
-# Hardware optimizations
 bf16: auto
 flash_attention: true
 gradient_checkpointing: offload
-deepspeed: deepspeed_configs/zero1.json</code>
       </div>
-      <p>Full configurations are available in the repository for those interested in complete training details.</p>
     </div>
   </div>
 </div>

 .data-arrow {color: #33ff99; width: 20px; display: inline-block;}
 .data-label {color: #00c3ff; width: 80px; display: inline-block;}
+/* Code config styling */
+.config-title {
+  color: #00c3ff;
+  font-size: 1.4rem;
+  text-transform: uppercase;
+  letter-spacing: 2px;
+  margin-bottom: 5px;
   font-family: 'Orbitron', sans-serif;
 }
+.config-underline {
+  width: 100%;
+  border-bottom: 1px dashed #00c3ff;
+  margin-bottom: 20px;
+}
+.config-section {
+  margin-bottom: 40px;
+}
+.config-subtitle {
   color: #e1e9f0;
+  font-size: 1.2rem;
+  margin: 25px 0 15px 0;
+  font-weight: normal;
 }
+.config-block {
+  position: relative;
+  background-color: #111927;
+  padding: 20px 20px 20px 25px;
+  border-radius: 4px;
+  overflow-x: auto;
 }
+.config-line {
+  position: absolute;
+  left: 0;
+  top: 0;
+  bottom: 0;
+  width: 4px;
+  background-color: #33ff99;
 }
+.config-code {
+  font-family: 'JetBrains Mono', monospace;
+  font-size: 0.9rem;
+  line-height: 1.7;
   color: #e1e9f0;
+  white-space: pre;
 }
 /* Subheading styling */
     <div class="section-content">
       <p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
       <p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
+      <div class="config-section">
+        <div class="config-title">AXOLOTL CONFIGURATIONS</div>
+        <div class="config-underline"></div>
+        <div class="config-subtitle">SFT Configuration:</div>
+        <div class="config-block">
+          <div class="config-line"></div>
+          <pre class="config-code">base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 special_tokens:
   pad_token: "<|finetune_right_pad_id|>"
 chat_template: llama3
+# ====================
+# DATASET CONFIGURATION
+# ====================
 datasets:
   - path: ./dataset.jsonl
     type: chat_template
       assistant: ["assistant"]
       system: ["system"]
+test_datasets:
+  - path: ./validate_dataset.jsonl
+    type: chat_template
+    split: train
+    chat_template_strategy: tokenizer
+    field_messages: messages
+    message_property_mappings:
+      role: role
+      content: content
+    roles:
+      user: ["user"]
+      assistant: ["assistant"]
+      system: ["system"]
+dataset_prepared_path:
+train_on_inputs: false  # Only train on assistant responses
+# ====================
+# QLORA CONFIGURATION
+# ====================
 adapter: qlora
 load_in_4bit: true
 lora_r: 64
 lora_alpha: 128
 lora_dropout: 0.1
 lora_target_linear: true
+# lora_modules_to_save:  # Uncomment only if you added NEW tokens
+# ====================
+# TRAINING PARAMETERS
+# ====================
+num_epochs: 2
+micro_batch_size: 4
+gradient_accumulation_steps: 2
+learning_rate: 1.5e-5
+optimizer: paged_adamw_8bit
+lr_scheduler: rex
+warmup_ratio: 0.05
+weight_decay: 0.01
+max_grad_norm: 1.0
+# ====================
+# SEQUENCE & PACKING
+# ====================
 sequence_len: 8192
 sample_packing: true
+eval_sample_packing: false
 pad_to_sequence_len: true
+# ====================
+# HARDWARE OPTIMIZATIONS
+# ====================
 bf16: auto
 flash_attention: true
+gradient_checkpointing: true
+# ====================
+# EVALUATION & CHECKPOINTING
+# ====================
+evaluation_strategy: steps
+eval_steps: 5
+save_strategy: steps
+save_steps: 5
+save_total_limit: 5  # Keep best + last few checkpoints
+load_best_model_at_end: true
+metric_for_best_model: eval_loss
+greater_is_better: false
+early_stopping_patience: 5
+# ====================
+# LOGGING & OUTPUT
+# ====================
+output_dir: ./output_model
+logging_steps: 2
+save_safetensors: true
+# ====================
+# WANDB TRACKING
+# ====================
+wandb_project: project_name
+# wandb_entity: your_entity  # Uncomment and set if needed
+# wandb_name: your_run_name  # Uncomment and set if needed</pre>
+        </div>
+        <div class="config-subtitle">DPO Configuration:</div>
+        <div class="config-block">
+          <div class="config-line"></div>
+          <pre class="config-code"># ====================
+# MODEL CONFIGURATION
+# ====================
+base_model: ApocalypseParty/unleashed-fulldata30
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+special_tokens: {}
 chat_template: tokenizer_default
+# ====================
+# RL/DPO CONFIGURATION
+# ====================
 rl: dpo
 rl_beta: 0.07
+# ====================
+# DATASET CONFIGURATION
+# ====================
 datasets:
   - path: ./dpo_cleaned-v3_deduplicated.jsonl
     type: chat_template.default
       system: ["system"]
       user: ["user"]
       assistant: ["assistant"]
+dataset_prepared_path:
+train_on_inputs: false  # Only train on assistant responses
+# ====================
+# QLORA CONFIGURATION
+# ====================
 adapter: qlora
 load_in_4bit: true
 lora_r: 32
 lora_alpha: 64
 lora_dropout: 0.05
 lora_target_linear: true
+# lora_modules_to_save:  # Uncomment only if you added NEW tokens
+# ====================
+# TRAINING PARAMETERS
+# ====================
+num_epochs: 1
+micro_batch_size: 4
+gradient_accumulation_steps: 2
+learning_rate: 2e-6
+optimizer: adamw_8bit
+lr_scheduler: cosine
+warmup_steps: 5
+weight_decay: 0.01
+max_grad_norm: 1.0
+# ====================
+# SEQUENCE CONFIGURATION
+# ====================
 sequence_len: 4096
 pad_to_sequence_len: true
+# ====================
+# HARDWARE OPTIMIZATIONS
+# ====================
 bf16: auto
+tf32: false
 flash_attention: true
 gradient_checkpointing: offload
+deepspeed: deepspeed_configs/zero1.json
+# ====================
+# CHECKPOINTING
+# ====================
+save_steps: 10
+save_total_limit: 10
+load_best_model_at_end: true
+metric_for_best_model: eval_loss
+greater_is_better: false
+# ====================
+# LOGGING & OUTPUT
+# ====================
+output_dir: ./dpo_model
+logging_steps: 2
+save_safetensors: true
+# ====================
+# WANDB TRACKING
+# ====================
+wandb_project: project_name
+# wandb_entity: your_entity  # Uncomment and set if needed
+# wandb_name: your_run_name  # Uncomment and set if needed</pre>
+        </div>
       </div>
     </div>
   </div>
 </div>