souging commited on
Commit
5dcce53
·
verified ·
1 Parent(s): 5762502

End of training

Browse files
Files changed (2) hide show
  1. README.md +28 -28
  2. adapter_model.bin +2 -2
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
  library_name: peft
3
  license: apache-2.0
4
- base_model: unsloth/SmolLM2-360M-Instruct
5
  tags:
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
- - name: bece4e2a-1acf-4d5f-9255-9dc0500ce188
10
  results: []
11
  ---
12
 
@@ -19,20 +19,19 @@ should probably proofread and complete it, then remove this comment. -->
19
  axolotl version: `0.4.1`
20
  ```yaml
21
  adapter: lora
22
- base_model: unsloth/SmolLM2-360M-Instruct
23
  bf16: auto
24
  dataset_prepared_path: null
25
  datasets:
26
  - data_files:
27
- - 65b7b1b934182bde_train_data.json
28
  ds_type: json
29
  format: custom
30
- path: /root/G.O.D-test/core/data/65b7b1b934182bde_train_data.json
31
  type:
32
- field_input: input
33
  field_instruction: instruction
34
  field_output: output
35
- format: '{instruction} {input}'
36
  no_input_format: '{instruction}'
37
  system_format: '{system}'
38
  system_prompt: ''
@@ -42,13 +41,13 @@ eval_max_new_tokens: 128
42
  eval_steps: 0
43
  evals_per_epoch: null
44
  flash_attention: true
45
- fp16: false
46
  fsdp: null
47
  fsdp_config: null
48
- gradient_accumulation_steps: 4
49
  gradient_checkpointing: false
50
  group_by_length: false
51
- hub_model_id: souging/bece4e2a-1acf-4d5f-9255-9dc0500ce188
52
  hub_repo: null
53
  hub_strategy: checkpoint
54
  hub_token: null
@@ -64,9 +63,9 @@ lora_model_dir: null
64
  lora_r: 32
65
  lora_target_linear: true
66
  lr_scheduler: cosine
67
- max_steps: 500
68
- micro_batch_size: 10
69
- mlflow_experiment_name: /tmp/65b7b1b934182bde_train_data.json
70
  model_type: AutoModelForCausalLM
71
  num_epochs: 10
72
  optimizer: adamw_bnb_8bit
@@ -77,8 +76,9 @@ s2_attention: null
77
  sample_packing: false
78
  save_steps: 0
79
  saves_per_epoch: null
80
- seed: 20
81
- sequence_len: 2048
 
82
  strict: false
83
  tf32: false
84
  tokenizer_type: AutoTokenizer
@@ -86,21 +86,21 @@ train_on_inputs: false
86
  trust_remote_code: true
87
  wandb_entity: null
88
  wandb_mode: online
89
- wandb_name: 42d66002-df23-4eff-b50f-ada526fd7375
90
  wandb_project: Gradients-On-Demand
91
  wandb_run: your_name
92
- wandb_runid: 42d66002-df23-4eff-b50f-ada526fd7375
93
  warmup_steps: 100
94
- weight_decay: 0.0
95
- xformers_attention: false
96
 
97
  ```
98
 
99
  </details><br>
100
 
101
- # bece4e2a-1acf-4d5f-9255-9dc0500ce188
102
 
103
- This model is a fine-tuned version of [unsloth/SmolLM2-360M-Instruct](https://huggingface.co/unsloth/SmolLM2-360M-Instruct) on the None dataset.
104
 
105
  ## Model description
106
 
@@ -120,18 +120,18 @@ More information needed
120
 
121
  The following hyperparameters were used during training:
122
  - learning_rate: 0.000202
123
- - train_batch_size: 10
124
- - eval_batch_size: 10
125
- - seed: 20
126
  - distributed_type: multi-GPU
127
  - num_devices: 8
128
- - gradient_accumulation_steps: 4
129
- - total_train_batch_size: 320
130
- - total_eval_batch_size: 80
131
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
132
  - lr_scheduler_type: cosine
133
  - lr_scheduler_warmup_steps: 100
134
- - training_steps: 500
135
 
136
  ### Training results
137
 
 
1
  ---
2
  library_name: peft
3
  license: apache-2.0
4
+ base_model: NousResearch/Yarn-Solar-10b-32k
5
  tags:
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
+ - name: bb4b141f-2c27-4c46-88a8-b7997d6eb489
10
  results: []
11
  ---
12
 
 
19
  axolotl version: `0.4.1`
20
  ```yaml
21
  adapter: lora
22
+ base_model: NousResearch/Yarn-Solar-10b-32k
23
  bf16: auto
24
  dataset_prepared_path: null
25
  datasets:
26
  - data_files:
27
+ - 2791f4c87cd58c7c_train_data.json
28
  ds_type: json
29
  format: custom
30
+ path: /root/G.O.D-test/core/data/2791f4c87cd58c7c_train_data.json
31
  type:
 
32
  field_instruction: instruction
33
  field_output: output
34
+ format: '{instruction}'
35
  no_input_format: '{instruction}'
36
  system_format: '{system}'
37
  system_prompt: ''
 
41
  eval_steps: 0
42
  evals_per_epoch: null
43
  flash_attention: true
44
+ fp16: null
45
  fsdp: null
46
  fsdp_config: null
47
+ gradient_accumulation_steps: 6
48
  gradient_checkpointing: false
49
  group_by_length: false
50
+ hub_model_id: souging/bb4b141f-2c27-4c46-88a8-b7997d6eb489
51
  hub_repo: null
52
  hub_strategy: checkpoint
53
  hub_token: null
 
63
  lora_r: 32
64
  lora_target_linear: true
65
  lr_scheduler: cosine
66
+ max_steps: 700
67
+ micro_batch_size: 1
68
+ mlflow_experiment_name: /tmp/2791f4c87cd58c7c_train_data.json
69
  model_type: AutoModelForCausalLM
70
  num_epochs: 10
71
  optimizer: adamw_bnb_8bit
 
76
  sample_packing: false
77
  save_steps: 0
78
  saves_per_epoch: null
79
+ sequence_len: 1920
80
+ special_tokens:
81
+ pad_token: </s>
82
  strict: false
83
  tf32: false
84
  tokenizer_type: AutoTokenizer
 
86
  trust_remote_code: true
87
  wandb_entity: null
88
  wandb_mode: online
89
+ wandb_name: 0d6c6775-16be-47ce-928c-5e74f826d12a
90
  wandb_project: Gradients-On-Demand
91
  wandb_run: your_name
92
+ wandb_runid: 0d6c6775-16be-47ce-928c-5e74f826d12a
93
  warmup_steps: 100
94
+ weight_decay: 0.01
95
+ xformers_attention: null
96
 
97
  ```
98
 
99
  </details><br>
100
 
101
+ # bb4b141f-2c27-4c46-88a8-b7997d6eb489
102
 
103
+ This model is a fine-tuned version of [NousResearch/Yarn-Solar-10b-32k](https://huggingface.co/NousResearch/Yarn-Solar-10b-32k) on the None dataset.
104
 
105
  ## Model description
106
 
 
120
 
121
  The following hyperparameters were used during training:
122
  - learning_rate: 0.000202
123
+ - train_batch_size: 1
124
+ - eval_batch_size: 1
125
+ - seed: 42
126
  - distributed_type: multi-GPU
127
  - num_devices: 8
128
+ - gradient_accumulation_steps: 6
129
+ - total_train_batch_size: 48
130
+ - total_eval_batch_size: 8
131
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
132
  - lr_scheduler_type: cosine
133
  - lr_scheduler_warmup_steps: 100
134
+ - training_steps: 700
135
 
136
  ### Training results
137
 
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abd2868dbb0e5c7bf55bea07e85af59be50add159ee543d2e717a25d30ce4a96
3
- size 69629450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3994fdea994ac2ad5f46d32b8e6e5211aff3e00b064b989500712c0b21ef5ea
3
+ size 503559370