ThomasTheMaker commited on
Commit
878414f
·
verified ·
1 Parent(s): 607bbef

Upload qwen3-0.6B-1000data.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. qwen3-0.6B-1000data.yaml +46 -0
qwen3-0.6B-1000data.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: Qwen/Qwen3-0.6B
3
+ quantization_bit: 4 # choices: [8 (bnb/hqq/eetq), 4 (bnb/hqq), 3 (hqq), 2 (hqq)]
4
+ quantization_method: bnb # choices: [bnb, hqq, eetq]
5
+ trust_remote_code: true
6
+
7
+ ### method
8
+ stage: sft
9
+ do_train: true
10
+ finetuning_type: lora
11
+ lora_rank: 8
12
+ lora_target: all
13
+
14
+ ### dataset
15
+ dataset: alpaca_en_demo
16
+ template: qwen3
17
+ cutoff_len: 2048
18
+ max_samples: 1000
19
+ overwrite_cache: true
20
+ preprocessing_num_workers: 16
21
+ dataloader_num_workers: 4
22
+
23
+ ### output
24
+ output_dir: saves/qwen3-0.6b/lora/sft
25
+ logging_steps: 10
26
+ save_steps: 500
27
+ plot_loss: true
28
+ overwrite_output_dir: true
29
+ save_only_model: false
30
+ report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
31
+
32
+ ### train
33
+ per_device_train_batch_size: 1
34
+ gradient_accumulation_steps: 8
35
+ learning_rate: 1.0e-4
36
+ num_train_epochs: 3.0
37
+ lr_scheduler_type: cosine
38
+ warmup_ratio: 0.1
39
+ bf16: true
40
+ ddp_timeout: 180000000
41
+
42
+ ### eval
43
+ # val_size: 0.1
44
+ # per_device_eval_batch_size: 1
45
+ # eval_strategy: steps
46
+ # eval_steps: 500