End of training

Browse files

Files changed (4) hide show

README.md +47 -1
adapter_config.json +4 -4
adapter_model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -15,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
 # build_your_circuit_lora
 This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on an unknown dataset.
 ## Model description
@@ -42,10 +44,54 @@ The following hyperparameters were used during training:
 - num_epochs: 20
 - mixed_precision_training: Native AMP
 ### Framework versions
 - PEFT 0.15.2
-- Transformers 4.52.4
 - Pytorch 2.6.0+cu124
 - Datasets 3.6.0
 - Tokenizers 0.21.2

 # build_your_circuit_lora
 This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.9179
 ## Model description
 - num_epochs: 20
 - mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch   | Step  | Validation Loss |
+|:-------------:|:-------:|:-----:|:---------------:|
+| 5.6558        | 0.5263  | 500   | 2.7776          |
+| 2.7422        | 1.0526  | 1000  | 1.9060          |
+| 2.1288        | 1.5789  | 1500  | 1.6338          |
+| 1.863         | 2.1053  | 2000  | 1.4802          |
+| 1.6722        | 2.6316  | 2500  | 1.3769          |
+| 1.5777        | 3.1579  | 3000  | 1.2927          |
+| 1.4825        | 3.6842  | 3500  | 1.2269          |
+| 1.43          | 4.2105  | 4000  | 1.1850          |
+| 1.3737        | 4.7368  | 4500  | 1.1518          |
+| 1.323         | 5.2632  | 5000  | 1.1274          |
+| 1.2881        | 5.7895  | 5500  | 1.0999          |
+| 1.2751        | 6.3158  | 6000  | 1.0804          |
+| 1.2417        | 6.8421  | 6500  | 1.0641          |
+| 1.2058        | 7.3684  | 7000  | 1.0429          |
+| 1.1967        | 7.8947  | 7500  | 1.0309          |
+| 1.1881        | 8.4211  | 8000  | 1.0186          |
+| 1.1615        | 8.9474  | 8500  | 1.0139          |
+| 1.1486        | 9.4737  | 9000  | 0.9940          |
+| 1.139         | 10.0    | 9500  | 0.9825          |
+| 1.1334        | 10.5263 | 10000 | 0.9786          |
+| 1.1079        | 11.0526 | 10500 | 0.9731          |
+| 1.1141        | 11.5789 | 11000 | 0.9648          |
+| 1.1053        | 12.1053 | 11500 | 0.9613          |
+| 1.0943        | 12.6316 | 12000 | 0.9539          |
+| 1.0776        | 13.1579 | 12500 | 0.9502          |
+| 1.1101        | 13.6842 | 13000 | 0.9415          |
+| 1.0635        | 14.2105 | 13500 | 0.9373          |
+| 1.0527        | 14.7368 | 14000 | 0.9371          |
+| 1.0926        | 15.2632 | 14500 | 0.9317          |
+| 1.0639        | 15.7895 | 15000 | 0.9310          |
+| 1.0445        | 16.3158 | 15500 | 0.9272          |
+| 1.0672        | 16.8421 | 16000 | 0.9260          |
+| 1.0519        | 17.3684 | 16500 | 0.9227          |
+| 1.0581        | 17.8947 | 17000 | 0.9203          |
+| 1.0365        | 18.4211 | 17500 | 0.9193          |
+| 1.0595        | 18.9474 | 18000 | 0.9178          |
+| 1.0449        | 19.4737 | 18500 | 0.9179          |
+| 1.0478        | 20.0    | 19000 | 0.9179          |
 ### Framework versions
 - PEFT 0.15.2
+- Transformers 4.55.4
 - Pytorch 2.6.0+cu124
 - Datasets 3.6.0
 - Tokenizers 0.21.2

adapter_config.json CHANGED Viewed

@@ -13,21 +13,21 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 32,
   "lora_bias": false,
   "lora_dropout": 0.01,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k",
-    "v",
     "q",
-    "o"
   ],
   "task_type": "SEQ_2_SEQ_LM",
   "trainable_token_indices": null,

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 16,
   "lora_bias": false,
   "lora_dropout": 0.01,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k",
+    "o",
     "q",
+    "v"
   ],
   "task_type": "SEQ_2_SEQ_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12a27d56f59a04832f4840e8b01ddd6595b09cca5325a98de2d0ef298871b966
-size 136240968

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f00392fb610a9986c3d1dc0c7e7ee97ad35b02de6dd769c894f7b9989c86e75
+size 133881456

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c865b5cd3e05e4dc94488ecb7529ff21a334dfd8d9de9d69bf5ca8df4768622
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:9562e99463250b2d7913296e340964def870f91269062fc07c209f0562acce62
 size 5432