Delta-Vector commited on
Commit
92f4065
·
verified ·
1 Parent(s): ac9d957

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -10
README.md CHANGED
@@ -190,6 +190,8 @@ details summary:hover {
190
  - Amount of Tokens: 500 Million
191
  </div>
192
 
 
 
193
  </div>
194
 
195
  <div style="border: 2px solid #6e00ff; border-radius: 10px; padding: 20px; margin: 20px 0; box-shadow: 0 0 15px rgba(110, 0, 255, 0.5);">
@@ -197,30 +199,45 @@ details summary:hover {
197
  ## Axolotl Config ꒰(˶• ᴗ •˶)꒱
198
 
199
  <details>
200
- ```
201
- base_model: NewEden_Phi4-PT-merged
 
202
  model_type: AutoModelForCausalLM
203
  tokenizer_type: AutoTokenizer
 
204
  plugins:
205
  - axolotl.integrations.liger.LigerPlugin
206
  liger_rope: true
207
  liger_rms_norm: true
208
  liger_swiglu: true
209
  liger_fused_linear_cross_entropy: true
 
 
210
  load_in_8bit: false
211
  load_in_4bit: false
212
  strict: false
 
213
  datasets:
214
- - path: NewEden/Orion-LIT
215
- type: completion
216
- field: text
 
 
 
 
 
 
 
 
217
  shuffle_merged_datasets: true
218
  dataset_prepared_path: prepared_data
219
  val_set_size: 0.0
220
- output_dir: ./phi4-ptv2-out-r1
 
221
  sequence_len: 16384
222
  sample_packing: true
223
  pad_to_sequence_len: true
 
224
  adapter: lora
225
  lora_model_dir:
226
  lora_r: 128
@@ -234,25 +251,31 @@ lora_target_modules:
234
  - v_proj
235
  - k_proj
236
  - o_proj
 
237
  lora_modules_to_save:
238
  - embed_tokens
239
  - lm_head
 
 
240
  wandb_project: mag-phi
241
  wandb_entity:
242
  wandb_watch:
243
- wandb_name: comp-v2-attempt-01
244
  wandb_log_model:
 
245
  gradient_accumulation_steps: 4
246
  micro_batch_size: 2
247
- num_epochs: 1
248
  optimizer: paged_ademamix_8bit
249
  lr_scheduler: cosine
250
- learning_rate: 0.00002
 
251
  train_on_inputs: false
252
  group_by_length: false
253
  bf16: auto
254
  fp16:
255
  tf32: false
 
256
  gradient_checkpointing: unsloth
257
  early_stopping_patience:
258
  resume_from_checkpoint:
@@ -260,17 +283,19 @@ local_rank:
260
  logging_steps: 1
261
  xformers_attention:
262
  flash_attention: true
 
263
  warmup_steps: 15
264
  evals_per_epoch: 4
265
  eval_table_size:
266
  eval_max_new_tokens: 128
267
- saves_per_epoch: 4
268
  debug:
269
  deepspeed: /workspace/axolotl/deepspeed_configs/zero3_bf16_cpuoffload_params.json
270
  weight_decay: 0.01
271
  fsdp:
272
  fsdp_config:
273
  ```
 
274
  </details>
275
  </div>
276
 
 
190
  - Amount of Tokens: 500 Million
191
  </div>
192
 
193
+
194
+
195
  </div>
196
 
197
  <div style="border: 2px solid #6e00ff; border-radius: 10px; padding: 20px; margin: 20px 0; box-shadow: 0 0 15px rgba(110, 0, 255, 0.5);">
 
199
  ## Axolotl Config ꒰(˶• ᴗ •˶)꒱
200
 
201
  <details>
202
+
203
+ ```yaml
204
+ base_model: NewEden_Phi-PT-merged-LIT
205
  model_type: AutoModelForCausalLM
206
  tokenizer_type: AutoTokenizer
207
+
208
  plugins:
209
  - axolotl.integrations.liger.LigerPlugin
210
  liger_rope: true
211
  liger_rms_norm: true
212
  liger_swiglu: true
213
  liger_fused_linear_cross_entropy: true
214
+
215
+
216
  load_in_8bit: false
217
  load_in_4bit: false
218
  strict: false
219
+
220
  datasets:
221
+ - path: PocketDoc/Dans-MemoryCore-CoreCurriculum-Small
222
+ type: dan-chat-advanced
223
+ - path: Nitral-AI/ARES-ShareGPT
224
+ type: dan-chat-advanced
225
+ - path: Gryphe/Sonnet3.5-SlimOrcaDedupCleaned-20k
226
+ type: dan-chat-advanced
227
+ - path: NewEden/Claude-Instruct-2.7K
228
+ type: dan-chat-advanced
229
+ - path: NewEden/Claude-Instruct-5K
230
+ type: dan-chat-advanced
231
+
232
  shuffle_merged_datasets: true
233
  dataset_prepared_path: prepared_data
234
  val_set_size: 0.0
235
+ output_dir: ./phi4-inst-out-r2
236
+
237
  sequence_len: 16384
238
  sample_packing: true
239
  pad_to_sequence_len: true
240
+
241
  adapter: lora
242
  lora_model_dir:
243
  lora_r: 128
 
251
  - v_proj
252
  - k_proj
253
  - o_proj
254
+
255
  lora_modules_to_save:
256
  - embed_tokens
257
  - lm_head
258
+
259
+
260
  wandb_project: mag-phi
261
  wandb_entity:
262
  wandb_watch:
263
+ wandb_name: inst-attempt-02
264
  wandb_log_model:
265
+
266
  gradient_accumulation_steps: 4
267
  micro_batch_size: 2
268
+ num_epochs: 4
269
  optimizer: paged_ademamix_8bit
270
  lr_scheduler: cosine
271
+ learning_rate: 0.000025
272
+
273
  train_on_inputs: false
274
  group_by_length: false
275
  bf16: auto
276
  fp16:
277
  tf32: false
278
+
279
  gradient_checkpointing: unsloth
280
  early_stopping_patience:
281
  resume_from_checkpoint:
 
283
  logging_steps: 1
284
  xformers_attention:
285
  flash_attention: true
286
+
287
  warmup_steps: 15
288
  evals_per_epoch: 4
289
  eval_table_size:
290
  eval_max_new_tokens: 128
291
+ saves_per_epoch: 2
292
  debug:
293
  deepspeed: /workspace/axolotl/deepspeed_configs/zero3_bf16_cpuoffload_params.json
294
  weight_decay: 0.01
295
  fsdp:
296
  fsdp_config:
297
  ```
298
+
299
  </details>
300
  </div>
301