Delta-Vector commited on
Commit
007189f
·
verified ·
1 Parent(s): f2d0355

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +461 -0
README.md ADDED
@@ -0,0 +1,461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Queen's Model Citadel</title>
7
+ <style>
8
+ @import url('https://fonts.googleapis.com/css2?family=MedievalSharp&family=UnifrakturMaguntia&family=Cinzel+Decorative:wght@400;700;900&display=swap');
9
+ :root {
10
+ --dark-purple: #1a001a;
11
+ --royal-purple: #3d0066;
12
+ --violet-crown: #6600cc;
13
+ --neon-amethyst: #9d00ff;
14
+ --witch-glow: #cc33ff;
15
+ --cursed-pink: #ff00ff;
16
+ --obsidian: #0d0010;
17
+ --blood-moon: #cc0066;
18
+ }
19
+ body {
20
+ background: radial-gradient(ellipse at center, var(--dark-purple) 0%, var(--obsidian) 100%);
21
+ color: #f0e6ff;
22
+ font-family: 'Cinzel Decorative', cursive;
23
+ margin: 0;
24
+ padding: 0;
25
+ overflow-x: hidden;
26
+ text-shadow: 0 0 5px var(--witch-glow);
27
+ animation: pulseBackground 30s infinite alternate;
28
+ }
29
+ @keyframes pulseBackground {
30
+ 0% { background-color: var(--dark-purple); }
31
+ 25% { background-color: #250025); }
32
+ 50% { background-color: #300030); }
33
+ 75% { background-color: #250025); }
34
+ 100% { background-color: var(--dark-purple); }
35
+ }
36
+ .container {
37
+ max-width: 900px;
38
+ margin: 0 auto;
39
+ padding: 2rem;
40
+ border: 3px solid var(--violet-crown);
41
+ border-radius: 0 0 20px 20px;
42
+ box-shadow: 0 0 30px var(--neon-amethyst), inset 0 0 20px rgba(157, 0, 255, 0.3);
43
+ background: linear-gradient(135deg, rgba(61, 0, 102, 0.2) 0%, rgba(26, 0, 26, 0.8) 100%);
44
+ position: relative;
45
+ overflow: hidden;
46
+ }
47
+ .container::before {
48
+ content: "";
49
+ position: absolute;
50
+ top: 0;
51
+ left: 0;
52
+ right: 0;
53
+ height: 5px;
54
+ background: linear-gradient(90deg, var(--blood-moon), var(--cursed-pink), var(--witch-glow), var(--neon-amethyst), var(--violet-crown));
55
+ animation: rainbowBorder 8s linear infinite;
56
+ }
57
+ @keyframes rainbowBorder {
58
+ 0% { background-position: 0% 50%; }
59
+ 100% { background-position: 100% 50%; }
60
+ }
61
+ h1 {
62
+ font-family: 'UnifrakturMaguntia', cursive;
63
+ color: var(--witch-glow);
64
+ text-align: center;
65
+ font-size: 4rem;
66
+ margin: 1rem 0 2rem;
67
+ text-shadow: 0 0 10px var(--cursed-pink), 0 0 20px var(--neon-amethyst);
68
+ animation: titleGlow 3s ease-in-out infinite alternate;
69
+ letter-spacing: 3px;
70
+ }
71
+ @keyframes titleGlow {
72
+ from { text-shadow: 0 0 10px var(--cursed-pink), 0 0 20px var(--neon-amethyst); }
73
+ to { text-shadow: 0 0 15px var(--blood-moon), 0 0 30px var(--witch-glow), 0 0 45px var(--violet-crown); }
74
+ }
75
+ h2 {
76
+ font-family: 'MedievalSharp', cursive;
77
+ color: var(--neon-amethyst);
78
+ border-bottom: 2px solid var(--violet-crown);
79
+ padding-bottom: 0.5rem;
80
+ margin-top: 2.5rem;
81
+ font-size: 2.2rem;
82
+ text-shadow: 0 0 8px rgba(157, 0, 255, 0.7);
83
+ position: relative;
84
+ }
85
+ h2::after {
86
+ content: "";
87
+ position: absolute;
88
+ bottom: -2px;
89
+ left: 0;
90
+ width: 100%;
91
+ height: 2px;
92
+ background: linear-gradient(90deg, transparent, var(--witch-glow), transparent);
93
+ animation: sectionUnderline 3s infinite;
94
+ }
95
+ @keyframes sectionUnderline {
96
+ 0% { transform: scaleX(0); opacity: 0; }
97
+ 50% { transform: scaleX(1); opacity: 1; }
98
+ 100% { transform: scaleX(0); opacity: 0; }
99
+ }
100
+ .section {
101
+ background: rgba(26, 0, 26, 0.5);
102
+ border-left: 4px solid var(--blood-moon);
103
+ padding: 1.5rem;
104
+ margin: 1.5rem 0;
105
+ border-radius: 0 10px 10px 0;
106
+ box-shadow: 5px 5px 15px rgba(0, 0, 0, 0.3);
107
+ transition: all 0.3s ease;
108
+ position: relative;
109
+ overflow: hidden;
110
+ }
111
+ .section:hover {
112
+ transform: translateX(5px);
113
+ box-shadow: 10px 10px 25px rgba(0, 0, 0, 0.5);
114
+ border-left: 4px solid var(--cursed-pink);
115
+ }
116
+ .section::before {
117
+ content: "";
118
+ position: absolute;
119
+ top: 0;
120
+ left: 0;
121
+ width: 100%;
122
+ height: 100%;
123
+ background: linear-gradient(45deg, transparent 65%, rgba(255, 0, 255, 0.05) 65%, rgba(255, 0, 255, 0.05) 70%, transparent 70%, transparent 85%, rgba(204, 51, 255, 0.05) 85%, rgba(204, 51, 255, 0.05) 90%, transparent 90%);
124
+ pointer-events: none;
125
+ }
126
+ pre {
127
+ background: rgba(61, 0, 102, 0.3);
128
+ border: 1px solid var(--violet-crown);
129
+ border-radius: 5px;
130
+ padding: 1rem;
131
+ overflow-x: auto;
132
+ font-family: monospace;
133
+ color: #e0c4ff;
134
+ text-shadow: 0 0 3px var(--witch-glow);
135
+ position: relative;
136
+ box-shadow: inset 0 0 10px rgba(157, 0, 255, 0.3);
137
+ }
138
+ pre::before {
139
+ content: "";
140
+ position: absolute;
141
+ top: 0;
142
+ left: 0;
143
+ right: 0;
144
+ height: 1px;
145
+ background: linear-gradient(90deg, transparent, var(--witch-glow), transparent);
146
+ }
147
+ pre::after {
148
+ content: "";
149
+ position: absolute;
150
+ bottom: 0;
151
+ left: 0;
152
+ right: 0;
153
+ height: 1px;
154
+ background: linear-gradient(90deg, transparent, var(--witch-glow), transparent);
155
+ }
156
+ code {
157
+ background: rgba(102, 0, 204, 0.2);
158
+ padding: 0.2rem 0.4rem;
159
+ border-radius: 3px;
160
+ font-family: monospace;
161
+ color: #f0d5ff;
162
+ border: 1px solid var(--neon-amethyst);
163
+ }
164
+ ul, ol {
165
+ padding-left: 1.5rem;
166
+ }
167
+ li {
168
+ margin-bottom: 0.5rem;
169
+ position: relative;
170
+ }
171
+ li::before {
172
+ content: "✧";
173
+ color: var(--witch-glow);
174
+ position: absolute;
175
+ left: -1.3rem;
176
+ animation: twinkle 2s infinite;
177
+ }
178
+ @keyframes twinkle {
179
+ 0% { opacity: 0.3; }
180
+ 50% { opacity: 1; }
181
+ 100% { opacity: 0.3; }
182
+ }
183
+ a {
184
+ color: var(--cursed-pink);
185
+ text-decoration: none;
186
+ position: relative;
187
+ transition: all 0.3s ease;
188
+ }
189
+ a:hover {
190
+ color: var(--witch-glow);
191
+ text-shadow: 0 0 8px var(--neon-amethyst);
192
+ }
193
+ a::after {
194
+ content: "";
195
+ position: absolute;
196
+ bottom: -2px;
197
+ left: 0;
198
+ width: 0;
199
+ height: 1px;
200
+ background: var(--witch-glow);
201
+ transition: width 0.3s ease;
202
+ }
203
+ a:hover::after {
204
+ width: 100%;
205
+ }
206
+ .floating-crystals {
207
+ position: absolute;
208
+ width: 100%;
209
+ height: 100%;
210
+ top: 0;
211
+ left: 0;
212
+ pointer-events: none;
213
+ z-index: -1;
214
+ overflow: hidden;
215
+ }
216
+ .crystal {
217
+ position: absolute;
218
+ width: 10px;
219
+ height: 10px;
220
+ background: rgba(204, 51, 255, 0.3);
221
+ transform: rotate(45deg);
222
+ animation: float 15s infinite linear;
223
+ }
224
+ @keyframes float {
225
+ 0% { transform: translateY(0) rotate(0deg); opacity: 0; }
226
+ 10% { opacity: 0.3; }
227
+ 90% { opacity: 0.3; }
228
+ 100% { transform: translateY(-100vh) rotate(360deg); opacity: 0; }
229
+ }
230
+ .sigil {
231
+ text-align: center;
232
+ margin: 2rem 0;
233
+ font-size: 3rem;
234
+ color: var(--witch-glow);
235
+ animation: sigilPulse 5s infinite;
236
+ }
237
+ @keyframes sigilPulse {
238
+ 0% { transform: scale(1); opacity: 0.7; }
239
+ 50% { transform: scale(1.1); opacity: 1; text-shadow: 0 0 15px var(--cursed-pink), 0 0 30px var(--neon-amethyst); }
240
+ 100% { transform: scale(1); opacity: 0.7; }
241
+ }
242
+ .tooltip {
243
+ position: relative;
244
+ display: inline-block;
245
+ cursor: help;
246
+ }
247
+ .tooltip .tooltiptext {
248
+ visibility: hidden;
249
+ width: 200px;
250
+ background-color: var(--royal-purple);
251
+ color: #fff;
252
+ text-align: center;
253
+ border-radius: 6px;
254
+ padding: 5px;
255
+ position: absolute;
256
+ z-index: 1;
257
+ bottom: 125%;
258
+ left: 50%;
259
+ margin-left: -100px;
260
+ opacity: 0;
261
+ transition: opacity 0.3s;
262
+ border: 1px solid var(--witch-glow);
263
+ box-shadow: 0 0 10px var(--neon-amethyst);
264
+ }
265
+ .tooltip:hover .tooltiptext {
266
+ visibility: visible;
267
+ opacity: 1;
268
+ }
269
+ .blood-drip {
270
+ position: absolute;
271
+ top: -20px;
272
+ right: 50px;
273
+ font-size: 3rem;
274
+ animation: drip 4s infinite;
275
+ }
276
+ @keyframes drip {
277
+ 0%, 100% { transform: translateY(0); }
278
+ 50% { transform: translateY(20px); }
279
+ }
280
+ @media (max-width: 768px) {
281
+ h1 { font-size: 2.5rem; }
282
+ .container { padding: 1rem; }
283
+ }
284
+ </style>
285
+ </head>
286
+ <body>
287
+ <div class="floating-crystals">
288
+ <div class="crystal" style="left: 10%; animation-delay: 0s; animation-duration: 20s;"></div>
289
+ <div class="crystal" style="left: 20%; animation-delay: 2s; animation-duration: 18s;"></div>
290
+ <div class="crystal" style="left: 30%; animation-delay: 4s; animation-duration: 22s;"></div>
291
+ <div class="crystal" style="left: 40%; animation-delay: 1s; animation-duration: 17s;"></div>
292
+ <div class="crystal" style="left: 50%; animation-delay: 3s; animation-duration: 19s;"></div>
293
+ <div class="crystal" style="left: 60%; animation-delay: 5s; animation-duration: 25s;"></div>
294
+ <div class="crystal" style="left: 70%; animation-delay: 0.5s; animation-duration: 16s;"></div>
295
+ <div class="crystal" style="left: 80%; animation-delay: 2.5s; animation-duration: 21s;"></div>
296
+ <div class="crystal" style="left: 90%; animation-delay: 3.5s; animation-duration: 23s;"></div>
297
+ </div>
298
+ <div class="container">
299
+ <div class="blood-drip">🩸</div>
300
+ <h1>Queen's Dark Model Citadel</h1>
301
+ <div class="sigil">⚜️</div>
302
+ <div class="section">
303
+ <h2>1. Details</h2>
304
+ <p>This <span class="tooltip">model<span class="tooltiptext">A dark artifact of machine learning</span></span> was forged in the <span class="tooltip">cauldron<span class="tooltiptext">Our GPU cluster</span></span> of the <span class="tooltip">Dark Queen<span class="tooltiptext">The mysterious creator</span></span> using forbidden knowledge and arcane techniques.</p>
305
+ <ul>
306
+ <li><strong>Model Type:</strong> <code>DarkSorceressForge-v2</code></li>
307
+ <li><strong>Architecture:</strong> <code>NecroTransformer-XL</code></li>
308
+ <li><strong>Parameters:</strong> <code>13.7B</code> (unlucky number for enemies)</li>
309
+ <li><strong>Precision:</strong> <code>BF16</code> (BloodFloat 16)</li>
310
+ <li><strong>Context Window:</strong> <code>4096</code> tokens (enough for long curses)</li>
311
+ <li><strong>License:</strong> <code>DarkArts-1.0</code></li>
312
+ </ul>
313
+ </div>
314
+ <div class="section">
315
+ <h2>2. Training Details</h2>
316
+ <p>The model was trained on a <span class="tooltip">sacrificial altar<span class="tooltiptext">8x A100 80GB GPUs</span></span> using the following dark rituals:</p>
317
+ <ul>
318
+ <li><strong>Dataset:</strong> <code>LibraryOfShadows-v5</code> (3.2T tokens of arcane knowledge)</li>
319
+ <li><strong>Training Steps:</strong> <code>666,666</code> (of course)</li>
320
+ <li><strong>Batch Size:</strong> <code>13</code> (another unlucky number)</li>
321
+ <li><strong>Learning Rate:</strong> <code>5e-5</code> (with dark magic scheduling)</li>
322
+ <li><strong>Warmup:</strong> <code>666</code> steps (to summon the gradient demons)</li>
323
+ <li><strong>Optimizer:</strong> <code>DarkAdamW</code> (β₁=0.9, β₂=0.999, ε=1e-8)</li>
324
+ <li><strong>Weight Decay:</strong> <code>0.01</code> (to prevent overconfidence)</li>
325
+ <li><strong>Gradient Clipping:</strong> <code>1.0</code> (to tame the dark energies)</li>
326
+ </ul>
327
+ <pre><code>{
328
+ "training_config": {
329
+ "dark_magic": true,
330
+ "sacrificial_gpus": 8,
331
+ "training_time": "13 days 13 hours 13 minutes",
332
+ "final_loss": 1.337,
333
+ "dark_incantations": ["flash_attention", "gradient_checkpointing", "deepspeed_stage_3"],
334
+ "forbidden_techniques": ["lora", "qlora", "paged_adamw"]
335
+ }
336
+ }</code></pre>
337
+ </div>
338
+ <div class="section">
339
+ <h2>3. Chat Template</h2>
340
+ <p>To commune with the model, use the following <span class="tooltip">incantation format<span class="tooltiptext">ChatML format with dark modifications</span></span>:</p>
341
+ <pre><code>{% for message in messages %}
342
+ {% if message['role'] == 'user' %}
343
+ <|user|>{{ message['content'] }}<|end|>
344
+ {% elif message['role'] == 'assistant' %}
345
+ <|assistant|>{{ message['content'] }}<|end|>
346
+ {% elif message['role'] == 'system' %}
347
+ <|system|>{{ message['content'] }}<|end|>
348
+ {% endif %}
349
+ {% endfor %}
350
+ <|assistant|></code></pre>
351
+ <p>Sampler/System prompt:</p>
352
+ <pre><code>from dark_magic import InvokeModel
353
+ ritual = InvokeModel(
354
+ temperature=0.7,
355
+ top_p=0.9,
356
+ max_new_tokens=666,
357
+ repetition_penalty=1.1,
358
+ do_sample=True
359
+ )
360
+ response = ritual.speak(
361
+ "Tell me of the ancient curses...",
362
+ model="DarkQueen/forbidden-knowledge-v13.7b"
363
+ )</code></pre>
364
+ </div>
365
+ <div class="section">
366
+ <h2>4. Trainer Config</h2>
367
+ <p>The dark rites of training were conducted with this configuration:</p>
368
+ <pre><code>
369
+ base_model: NewEden/MistralAI-Nemo-Instruct-ChatML
370
+ model_type: AutoModelForCausalLM
371
+ tokenizer_type: AutoTokenizer
372
+ load_in_8bit: false
373
+ load_in_4bit: false
374
+ strict: false
375
+ datasets:
376
+ - path: PocketDoc/Dans-Personamaxx-Logs
377
+ type: dan-chat-advanced
378
+ - path: anthracite-org/kalo-opus-instruct-22k-no-refusal
379
+ type: dan-chat-advanced
380
+ - path: lodrick-the-lafted/kalo-opus-instruct-3k-filtered
381
+ type: dan-chat-advanced
382
+ - path: anthracite-org/nopm_claude_writing_fixed
383
+ type: dan-chat-advanced
384
+ - path: anthracite-org/kalo_opus_misc_240827
385
+ type: dan-chat-advanced
386
+ - path: anthracite-org/kalo_misc_part2
387
+ type: dan-chat-advanced
388
+ - path: NewEden/Claude-Instruct-5K
389
+ type: dan-chat-advanced
390
+ - path: NewEden/Claude-Instruct-2.7K
391
+ type: dan-chat-advanced
392
+ shuffle_merged_datasets: true
393
+ dataset_prepared_path: dataset_preparedss
394
+ val_set_size: 0.0
395
+ output_dir: 12b-out-0001-max_grad_norm
396
+ plugins:
397
+ - axolotl.integrations.liger.LigerPlugin
398
+ liger_rope: true
399
+ liger_rms_norm: true
400
+ liger_layer_norm: true
401
+ liger_glu_activation: true
402
+ liger_fused_linear_cross_entropy: true
403
+ sequence_len: 32768
404
+ sample_packing: true
405
+ eval_sample_packing: false
406
+ pad_to_sequence_len: true
407
+ max_grad_norm: 0.001
408
+ wandb_project: Rei-V2
409
+ wandb_entity:
410
+ wandb_watch:
411
+ wandb_name: 0.001_max-grad-norm-4gpu_next
412
+ wandb_log_model:
413
+ evals_per_epoch: 0
414
+ eval_table_size:
415
+ eval_max_new_tokens: 128
416
+ gradient_accumulation_steps: 1
417
+ micro_batch_size: 4
418
+ num_epochs: 2
419
+ optimizer: adamw_bnb_8bit
420
+ lr_scheduler: cosine
421
+ learning_rate: 2e-6
422
+ train_on_inputs: false
423
+ group_by_length: false
424
+ bf16: auto
425
+ fp16:
426
+ tf32: false
427
+ gradient_checkpointing: true
428
+ early_stopping_patience:
429
+ resume_from_checkpoint:
430
+ local_rank:
431
+ logging_steps: 1
432
+ xformers_attention:
433
+ flash_attention: true
434
+ s2_attention:
435
+ warmup_steps: 40
436
+ saves_per_epoch: 2
437
+ debug:
438
+ deepspeed: ./deepspeed_configs/zero3_bf16.json
439
+ weight_decay: 0.0001
440
+ fsdp:
441
+ fsdp_config:
442
+ special_tokens:
443
+ pad_token: <pad>
444
+ </code></pre>
445
+ </div>
446
+ <div class="section">
447
+ <h2>5. Credits</h2>
448
+ <p>This model was created by the <span class="tooltip">Delta-Vector<span class="tooltiptext">In the middle of the night</span></span> with contributions from:</p>
449
+ <ul>
450
+ <li>The <span class="tooltip">LucyKnada<span class="tooltiptext">Shadow Goblin</span></span></li>
451
+ <li>The <span class="tooltip">Kubernetes_Bad<span class="tooltiptext">Hardware to inference the modelt</span></span></li>
452
+ <li>The <span class="tooltip">Intervitens<span class="tooltiptext">For providing Hardware for Prevous trains</span></span></li>
453
+ </ul>
454
+ <p>Special thanks to:</p>
455
+ <ul>
456
+ <li>The <span class="tooltip">Anthracite<span class="tooltiptext">For the support.</span></span></li>
457
+ <li>The <span class="tooltip">Pygmalion<span class="tooltiptext">For their feedback.</span></span></li>
458
+ </ul>
459
+ </div>
460
+ </body>
461
+ </html>