Update README.md
Browse files
README.md
CHANGED
@@ -98,38 +98,56 @@ body {font-family: sans-serif; background-color: #080c14; color: #e1e9f0; line-h
|
|
98 |
.data-arrow {color: #33ff99; width: 20px; display: inline-block;}
|
99 |
.data-label {color: #00c3ff; width: 80px; display: inline-block;}
|
100 |
|
101 |
-
/* Code
|
102 |
-
.
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
.code-header {
|
109 |
-
background-color: rgba(51, 255, 153, 0.1);
|
110 |
-
padding: 8px 15px;
|
111 |
font-family: 'Orbitron', sans-serif;
|
112 |
-
color: #33ff99;
|
113 |
-
font-size: 0.9rem;
|
114 |
-
letter-spacing: 1px;
|
115 |
-
border-bottom: 1px solid rgba(51, 255, 153, 0.2);
|
116 |
}
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
color: #e1e9f0;
|
123 |
-
|
|
|
|
|
124 |
}
|
125 |
-
|
126 |
-
|
|
|
|
|
|
|
|
|
|
|
127 |
}
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
}
|
131 |
-
|
|
|
|
|
|
|
|
|
132 |
color: #e1e9f0;
|
|
|
133 |
}
|
134 |
|
135 |
/* Subheading styling */
|
@@ -248,16 +266,22 @@ a:hover {text-decoration: underline;}
|
|
248 |
<div class="section-content">
|
249 |
<p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
|
250 |
<p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
|
251 |
-
<
|
252 |
-
|
253 |
-
|
254 |
-
<
|
|
|
|
|
|
|
255 |
model_type: AutoModelForCausalLM
|
256 |
tokenizer_type: AutoTokenizer
|
257 |
special_tokens:
|
258 |
pad_token: "<|finetune_right_pad_id|>"
|
259 |
chat_template: llama3
|
260 |
|
|
|
|
|
|
|
261 |
datasets:
|
262 |
- path: ./dataset.jsonl
|
263 |
type: chat_template
|
@@ -272,44 +296,110 @@ datasets:
|
|
272 |
assistant: ["assistant"]
|
273 |
system: ["system"]
|
274 |
|
275 |
-
test_datasets:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
-
|
278 |
-
train_on_inputs: false
|
279 |
-
num_epochs: 2
|
280 |
-
micro_batch_size: 4
|
281 |
-
gradient_accumulation_steps: 2
|
282 |
-
learning_rate: 1.5e-5
|
283 |
|
284 |
-
#
|
|
|
|
|
285 |
adapter: qlora
|
286 |
load_in_4bit: true
|
287 |
lora_r: 64
|
288 |
lora_alpha: 128
|
289 |
lora_dropout: 0.1
|
290 |
lora_target_linear: true
|
|
|
291 |
|
292 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
sequence_len: 8192
|
294 |
sample_packing: true
|
|
|
295 |
pad_to_sequence_len: true
|
296 |
|
297 |
-
#
|
|
|
|
|
298 |
bf16: auto
|
299 |
flash_attention: true
|
300 |
-
gradient_checkpointing: true
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
305 |
model_type: AutoModelForCausalLM
|
306 |
tokenizer_type: AutoTokenizer
|
|
|
307 |
chat_template: tokenizer_default
|
308 |
|
309 |
-
#
|
|
|
|
|
310 |
rl: dpo
|
311 |
rl_beta: 0.07
|
312 |
|
|
|
|
|
|
|
313 |
datasets:
|
314 |
- path: ./dpo_cleaned-v3_deduplicated.jsonl
|
315 |
type: chat_template.default
|
@@ -323,33 +413,72 @@ datasets:
|
|
323 |
system: ["system"]
|
324 |
user: ["user"]
|
325 |
assistant: ["assistant"]
|
|
|
|
|
326 |
|
327 |
-
#
|
328 |
-
|
329 |
-
|
330 |
-
micro_batch_size: 4
|
331 |
-
gradient_accumulation_steps: 2
|
332 |
-
learning_rate: 2e-6
|
333 |
-
|
334 |
-
# LoRA parameters
|
335 |
adapter: qlora
|
336 |
load_in_4bit: true
|
337 |
lora_r: 32
|
338 |
lora_alpha: 64
|
339 |
lora_dropout: 0.05
|
340 |
lora_target_linear: true
|
|
|
341 |
|
342 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
sequence_len: 4096
|
344 |
pad_to_sequence_len: true
|
345 |
|
346 |
-
#
|
|
|
|
|
347 |
bf16: auto
|
|
|
348 |
flash_attention: true
|
349 |
gradient_checkpointing: offload
|
350 |
-
deepspeed: deepspeed_configs/zero1.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
</div>
|
352 |
-
<p>Full configurations are available in the repository for those interested in complete training details.</p>
|
353 |
</div>
|
354 |
</div>
|
355 |
</div>
|
|
|
98 |
.data-arrow {color: #33ff99; width: 20px; display: inline-block;}
|
99 |
.data-label {color: #00c3ff; width: 80px; display: inline-block;}
|
100 |
|
101 |
+
/* Code config styling */
|
102 |
+
.config-title {
|
103 |
+
color: #00c3ff;
|
104 |
+
font-size: 1.4rem;
|
105 |
+
text-transform: uppercase;
|
106 |
+
letter-spacing: 2px;
|
107 |
+
margin-bottom: 5px;
|
|
|
|
|
|
|
108 |
font-family: 'Orbitron', sans-serif;
|
|
|
|
|
|
|
|
|
109 |
}
|
110 |
+
|
111 |
+
.config-underline {
|
112 |
+
width: 100%;
|
113 |
+
border-bottom: 1px dashed #00c3ff;
|
114 |
+
margin-bottom: 20px;
|
115 |
+
}
|
116 |
+
|
117 |
+
.config-section {
|
118 |
+
margin-bottom: 40px;
|
119 |
+
}
|
120 |
+
|
121 |
+
.config-subtitle {
|
122 |
color: #e1e9f0;
|
123 |
+
font-size: 1.2rem;
|
124 |
+
margin: 25px 0 15px 0;
|
125 |
+
font-weight: normal;
|
126 |
}
|
127 |
+
|
128 |
+
.config-block {
|
129 |
+
position: relative;
|
130 |
+
background-color: #111927;
|
131 |
+
padding: 20px 20px 20px 25px;
|
132 |
+
border-radius: 4px;
|
133 |
+
overflow-x: auto;
|
134 |
}
|
135 |
+
|
136 |
+
.config-line {
|
137 |
+
position: absolute;
|
138 |
+
left: 0;
|
139 |
+
top: 0;
|
140 |
+
bottom: 0;
|
141 |
+
width: 4px;
|
142 |
+
background-color: #33ff99;
|
143 |
}
|
144 |
+
|
145 |
+
.config-code {
|
146 |
+
font-family: 'JetBrains Mono', monospace;
|
147 |
+
font-size: 0.9rem;
|
148 |
+
line-height: 1.7;
|
149 |
color: #e1e9f0;
|
150 |
+
white-space: pre;
|
151 |
}
|
152 |
|
153 |
/* Subheading styling */
|
|
|
266 |
<div class="section-content">
|
267 |
<p>The model first went through SFT with a small synthetic dataset of 2.9 million tokens, approximately 750 conversations. Primarily RP data with small amounts of random instruct / assistant data and creative writing.</p>
|
268 |
<p>The model then went through DPO training using approx 1100 chosen examples from the SFT dataset that were of exceptional quality or showed verifiable instruction following. Rejected samples were generated using another Llama 3.3 finetune that is known for poor instruction following.</p>
|
269 |
+
<div class="config-section">
|
270 |
+
<div class="config-title">AXOLOTL CONFIGURATIONS</div>
|
271 |
+
<div class="config-underline"></div>
|
272 |
+
<div class="config-subtitle">SFT Configuration:</div>
|
273 |
+
<div class="config-block">
|
274 |
+
<div class="config-line"></div>
|
275 |
+
<pre class="config-code">base_model: zerofata/L3.3-GeneticLemonade-Unleashed-70B
|
276 |
model_type: AutoModelForCausalLM
|
277 |
tokenizer_type: AutoTokenizer
|
278 |
special_tokens:
|
279 |
pad_token: "<|finetune_right_pad_id|>"
|
280 |
chat_template: llama3
|
281 |
|
282 |
+
# ====================
|
283 |
+
# DATASET CONFIGURATION
|
284 |
+
# ====================
|
285 |
datasets:
|
286 |
- path: ./dataset.jsonl
|
287 |
type: chat_template
|
|
|
296 |
assistant: ["assistant"]
|
297 |
system: ["system"]
|
298 |
|
299 |
+
test_datasets:
|
300 |
+
- path: ./validate_dataset.jsonl
|
301 |
+
type: chat_template
|
302 |
+
split: train
|
303 |
+
chat_template_strategy: tokenizer
|
304 |
+
field_messages: messages
|
305 |
+
message_property_mappings:
|
306 |
+
role: role
|
307 |
+
content: content
|
308 |
+
roles:
|
309 |
+
user: ["user"]
|
310 |
+
assistant: ["assistant"]
|
311 |
+
system: ["system"]
|
312 |
|
313 |
+
dataset_prepared_path:
|
314 |
+
train_on_inputs: false # Only train on assistant responses
|
|
|
|
|
|
|
|
|
315 |
|
316 |
+
# ====================
|
317 |
+
# QLORA CONFIGURATION
|
318 |
+
# ====================
|
319 |
adapter: qlora
|
320 |
load_in_4bit: true
|
321 |
lora_r: 64
|
322 |
lora_alpha: 128
|
323 |
lora_dropout: 0.1
|
324 |
lora_target_linear: true
|
325 |
+
# lora_modules_to_save: # Uncomment only if you added NEW tokens
|
326 |
|
327 |
+
# ====================
|
328 |
+
# TRAINING PARAMETERS
|
329 |
+
# ====================
|
330 |
+
num_epochs: 2
|
331 |
+
micro_batch_size: 4
|
332 |
+
gradient_accumulation_steps: 2
|
333 |
+
learning_rate: 1.5e-5
|
334 |
+
optimizer: paged_adamw_8bit
|
335 |
+
lr_scheduler: rex
|
336 |
+
warmup_ratio: 0.05
|
337 |
+
weight_decay: 0.01
|
338 |
+
max_grad_norm: 1.0
|
339 |
+
|
340 |
+
# ====================
|
341 |
+
# SEQUENCE & PACKING
|
342 |
+
# ====================
|
343 |
sequence_len: 8192
|
344 |
sample_packing: true
|
345 |
+
eval_sample_packing: false
|
346 |
pad_to_sequence_len: true
|
347 |
|
348 |
+
# ====================
|
349 |
+
# HARDWARE OPTIMIZATIONS
|
350 |
+
# ====================
|
351 |
bf16: auto
|
352 |
flash_attention: true
|
353 |
+
gradient_checkpointing: true
|
354 |
+
|
355 |
+
# ====================
|
356 |
+
# EVALUATION & CHECKPOINTING
|
357 |
+
# ====================
|
358 |
+
evaluation_strategy: steps
|
359 |
+
eval_steps: 5
|
360 |
+
save_strategy: steps
|
361 |
+
save_steps: 5
|
362 |
+
save_total_limit: 5 # Keep best + last few checkpoints
|
363 |
+
load_best_model_at_end: true
|
364 |
+
metric_for_best_model: eval_loss
|
365 |
+
greater_is_better: false
|
366 |
+
early_stopping_patience: 5
|
367 |
+
|
368 |
+
# ====================
|
369 |
+
# LOGGING & OUTPUT
|
370 |
+
# ====================
|
371 |
+
output_dir: ./output_model
|
372 |
+
logging_steps: 2
|
373 |
+
save_safetensors: true
|
374 |
+
|
375 |
+
# ====================
|
376 |
+
# WANDB TRACKING
|
377 |
+
# ====================
|
378 |
+
wandb_project: project_name
|
379 |
+
# wandb_entity: your_entity # Uncomment and set if needed
|
380 |
+
# wandb_name: your_run_name # Uncomment and set if needed</pre>
|
381 |
+
</div>
|
382 |
+
<div class="config-subtitle">DPO Configuration:</div>
|
383 |
+
<div class="config-block">
|
384 |
+
<div class="config-line"></div>
|
385 |
+
<pre class="config-code"># ====================
|
386 |
+
# MODEL CONFIGURATION
|
387 |
+
# ====================
|
388 |
+
base_model: ApocalypseParty/unleashed-fulldata30
|
389 |
model_type: AutoModelForCausalLM
|
390 |
tokenizer_type: AutoTokenizer
|
391 |
+
special_tokens: {}
|
392 |
chat_template: tokenizer_default
|
393 |
|
394 |
+
# ====================
|
395 |
+
# RL/DPO CONFIGURATION
|
396 |
+
# ====================
|
397 |
rl: dpo
|
398 |
rl_beta: 0.07
|
399 |
|
400 |
+
# ====================
|
401 |
+
# DATASET CONFIGURATION
|
402 |
+
# ====================
|
403 |
datasets:
|
404 |
- path: ./dpo_cleaned-v3_deduplicated.jsonl
|
405 |
type: chat_template.default
|
|
|
413 |
system: ["system"]
|
414 |
user: ["user"]
|
415 |
assistant: ["assistant"]
|
416 |
+
dataset_prepared_path:
|
417 |
+
train_on_inputs: false # Only train on assistant responses
|
418 |
|
419 |
+
# ====================
|
420 |
+
# QLORA CONFIGURATION
|
421 |
+
# ====================
|
|
|
|
|
|
|
|
|
|
|
422 |
adapter: qlora
|
423 |
load_in_4bit: true
|
424 |
lora_r: 32
|
425 |
lora_alpha: 64
|
426 |
lora_dropout: 0.05
|
427 |
lora_target_linear: true
|
428 |
+
# lora_modules_to_save: # Uncomment only if you added NEW tokens
|
429 |
|
430 |
+
# ====================
|
431 |
+
# TRAINING PARAMETERS
|
432 |
+
# ====================
|
433 |
+
num_epochs: 1
|
434 |
+
micro_batch_size: 4
|
435 |
+
gradient_accumulation_steps: 2
|
436 |
+
learning_rate: 2e-6
|
437 |
+
optimizer: adamw_8bit
|
438 |
+
lr_scheduler: cosine
|
439 |
+
warmup_steps: 5
|
440 |
+
weight_decay: 0.01
|
441 |
+
max_grad_norm: 1.0
|
442 |
+
|
443 |
+
# ====================
|
444 |
+
# SEQUENCE CONFIGURATION
|
445 |
+
# ====================
|
446 |
sequence_len: 4096
|
447 |
pad_to_sequence_len: true
|
448 |
|
449 |
+
# ====================
|
450 |
+
# HARDWARE OPTIMIZATIONS
|
451 |
+
# ====================
|
452 |
bf16: auto
|
453 |
+
tf32: false
|
454 |
flash_attention: true
|
455 |
gradient_checkpointing: offload
|
456 |
+
deepspeed: deepspeed_configs/zero1.json
|
457 |
+
|
458 |
+
# ====================
|
459 |
+
# CHECKPOINTING
|
460 |
+
# ====================
|
461 |
+
save_steps: 10
|
462 |
+
save_total_limit: 10
|
463 |
+
load_best_model_at_end: true
|
464 |
+
metric_for_best_model: eval_loss
|
465 |
+
greater_is_better: false
|
466 |
+
|
467 |
+
# ====================
|
468 |
+
# LOGGING & OUTPUT
|
469 |
+
# ====================
|
470 |
+
output_dir: ./dpo_model
|
471 |
+
logging_steps: 2
|
472 |
+
save_safetensors: true
|
473 |
+
|
474 |
+
# ====================
|
475 |
+
# WANDB TRACKING
|
476 |
+
# ====================
|
477 |
+
wandb_project: project_name
|
478 |
+
# wandb_entity: your_entity # Uncomment and set if needed
|
479 |
+
# wandb_name: your_run_name # Uncomment and set if needed</pre>
|
480 |
+
</div>
|
481 |
</div>
|
|
|
482 |
</div>
|
483 |
</div>
|
484 |
</div>
|