yalhessi commited on
Commit
f67aa01
·
verified ·
1 Parent(s): c4e5d35

End of training

Browse files
README.md ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: llama3.2
4
+ base_model: meta-llama/Llama-3.2-1B
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: lemexp-task1-lemma_command_small-Llama-3.2-1B-ddp-8lr
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # lemexp-task1-lemma_command_small-Llama-3.2-1B-ddp-8lr
16
+
17
+ This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 0.6416
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 0.0008
39
+ - train_batch_size: 2
40
+ - eval_batch_size: 2
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 8
44
+ - total_train_batch_size: 16
45
+ - total_eval_batch_size: 16
46
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
47
+ - lr_scheduler_type: linear
48
+ - num_epochs: 12
49
+ - mixed_precision_training: Native AMP
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | Validation Loss |
54
+ |:-------------:|:-------:|:-----:|:---------------:|
55
+ | No log | 0.2003 | 461 | 1.0440 |
56
+ | 1.128 | 0.4005 | 922 | 0.9841 |
57
+ | 1.0276 | 0.6008 | 1383 | 0.9573 |
58
+ | 0.9831 | 0.8010 | 1844 | 0.9383 |
59
+ | 0.9507 | 1.0013 | 2305 | 0.9198 |
60
+ | 0.9196 | 1.2016 | 2766 | 0.8958 |
61
+ | 0.8816 | 1.4018 | 3227 | 0.9029 |
62
+ | 0.8802 | 1.6021 | 3688 | 0.8764 |
63
+ | 0.8751 | 1.8023 | 4149 | 0.8647 |
64
+ | 0.865 | 2.0026 | 4610 | 0.8444 |
65
+ | 0.8264 | 2.2029 | 5071 | 0.8443 |
66
+ | 0.8188 | 2.4031 | 5532 | 0.8321 |
67
+ | 0.8188 | 2.6034 | 5993 | 0.8318 |
68
+ | 0.8145 | 2.8036 | 6454 | 0.8210 |
69
+ | 0.8142 | 3.0039 | 6915 | 0.8134 |
70
+ | 0.7998 | 3.2042 | 7376 | 0.8051 |
71
+ | 0.7759 | 3.4044 | 7837 | 0.8013 |
72
+ | 0.764 | 3.6047 | 8298 | 0.7852 |
73
+ | 0.764 | 3.8050 | 8759 | 0.7829 |
74
+ | 0.7626 | 4.0052 | 9220 | 0.7777 |
75
+ | 0.744 | 4.2055 | 9681 | 0.7865 |
76
+ | 0.7304 | 4.4057 | 10142 | 0.7671 |
77
+ | 0.7307 | 4.6060 | 10603 | 0.7595 |
78
+ | 0.7313 | 4.8063 | 11064 | 0.7644 |
79
+ | 0.7267 | 5.0065 | 11525 | 0.7642 |
80
+ | 0.7267 | 5.2068 | 11986 | 0.7450 |
81
+ | 0.6842 | 5.4070 | 12447 | 0.7489 |
82
+ | 0.6944 | 5.6073 | 12908 | 0.7354 |
83
+ | 0.6957 | 5.8076 | 13369 | 0.7272 |
84
+ | 0.6855 | 6.0078 | 13830 | 0.7318 |
85
+ | 0.6801 | 6.2081 | 14291 | 0.7291 |
86
+ | 0.6528 | 6.4083 | 14752 | 0.7155 |
87
+ | 0.6551 | 6.6086 | 15213 | 0.7167 |
88
+ | 0.6576 | 6.8089 | 15674 | 0.7161 |
89
+ | 0.6561 | 7.0091 | 16135 | 0.7076 |
90
+ | 0.6262 | 7.2094 | 16596 | 0.7111 |
91
+ | 0.627 | 7.4096 | 17057 | 0.7074 |
92
+ | 0.624 | 7.6099 | 17518 | 0.6987 |
93
+ | 0.624 | 7.8102 | 17979 | 0.6931 |
94
+ | 0.6178 | 8.0104 | 18440 | 0.6892 |
95
+ | 0.6116 | 8.2107 | 18901 | 0.6908 |
96
+ | 0.5815 | 8.4109 | 19362 | 0.6831 |
97
+ | 0.5887 | 8.6112 | 19823 | 0.6758 |
98
+ | 0.5823 | 8.8115 | 20284 | 0.6793 |
99
+ | 0.5885 | 9.0117 | 20745 | 0.6718 |
100
+ | 0.5636 | 9.2120 | 21206 | 0.6703 |
101
+ | 0.5485 | 9.4123 | 21667 | 0.6666 |
102
+ | 0.5569 | 9.6125 | 22128 | 0.6596 |
103
+ | 0.5534 | 9.8128 | 22589 | 0.6519 |
104
+ | 0.5537 | 10.0130 | 23050 | 0.6631 |
105
+ | 0.5146 | 10.2133 | 23511 | 0.6657 |
106
+ | 0.5146 | 10.4136 | 23972 | 0.6550 |
107
+ | 0.5212 | 10.6138 | 24433 | 0.6490 |
108
+ | 0.5179 | 10.8141 | 24894 | 0.6483 |
109
+ | 0.5234 | 11.0143 | 25355 | 0.6498 |
110
+ | 0.5 | 11.2146 | 25816 | 0.6494 |
111
+ | 0.4811 | 11.4149 | 26277 | 0.6499 |
112
+ | 0.4849 | 11.6151 | 26738 | 0.6448 |
113
+ | 0.497 | 11.8154 | 27199 | 0.6416 |
114
+
115
+
116
+ ### Framework versions
117
+
118
+ - PEFT 0.14.0
119
+ - Transformers 4.47.0
120
+ - Pytorch 2.5.1+cu124
121
+ - Datasets 3.2.0
122
+ - Tokenizers 0.21.0
adapter_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.2-1B",
5
+ "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
+ "fan_in_fan_out": false,
9
+ "inference_mode": true,
10
+ "init_lora_weights": true,
11
+ "layer_replication": null,
12
+ "layers_pattern": null,
13
+ "layers_to_transform": null,
14
+ "loftq_config": {},
15
+ "lora_alpha": 32,
16
+ "lora_bias": false,
17
+ "lora_dropout": 0.05,
18
+ "megatron_config": null,
19
+ "megatron_core": "megatron.core",
20
+ "modules_to_save": null,
21
+ "peft_type": "LORA",
22
+ "r": 8,
23
+ "rank_pattern": {},
24
+ "revision": null,
25
+ "target_modules": [
26
+ "q_proj",
27
+ "v_proj"
28
+ ],
29
+ "task_type": "CAUSAL_LM",
30
+ "use_dora": false,
31
+ "use_rslora": false
32
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16fdbd6dfca568a670acb962bba756ca7361330ac6bda97c9a1e7798c67ace64
3
+ size 1054097832
loss_plot.png ADDED
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c1ad668d2dd7964f285921b32e8b935a7c49980c1a495018547599f90f168f0
3
+ size 5432