End of training
Browse files- README.md +22 -76
- added_tokens.json +5 -0
- config.json +5 -5
- generation_config.json +2 -1
- model.safetensors +2 -2
- runs/Dec01_13-22-37_christopher-System-Product-Name/events.out.tfevents.1701397358.christopher-System-Product-Name.719702.0 +3 -0
- runs/Dec01_13-23-31_christopher-System-Product-Name/events.out.tfevents.1701397412.christopher-System-Product-Name.720516.0 +3 -0
- runs/Dec01_13-24-20_christopher-System-Product-Name/events.out.tfevents.1701397461.christopher-System-Product-Name.721356.0 +3 -0
- runs/Dec01_13-25-12_christopher-System-Product-Name/events.out.tfevents.1701397513.christopher-System-Product-Name.722333.0 +3 -0
- runs/Dec01_13-26-53_christopher-System-Product-Name/events.out.tfevents.1701397614.christopher-System-Product-Name.724388.0 +3 -0
- runs/Dec01_13-31-52_christopher-System-Product-Name/events.out.tfevents.1701397913.christopher-System-Product-Name.730346.0 +3 -0
- runs/Dec01_13-32-46_christopher-System-Product-Name/events.out.tfevents.1701397967.christopher-System-Product-Name.731453.0 +3 -0
- runs/Dec01_13-38-13_christopher-System-Product-Name/events.out.tfevents.1701398294.christopher-System-Product-Name.737472.0 +3 -0
- runs/Dec01_13-42-50_christopher-System-Product-Name/events.out.tfevents.1701398571.christopher-System-Product-Name.742627.0 +3 -0
- runs/Nov29_11-48-56_christopher-System-Product-Name/events.out.tfevents.1701218937.christopher-System-Product-Name.111994.0 +3 -0
- runs/Nov29_11-49-25_christopher-System-Product-Name/events.out.tfevents.1701218966.christopher-System-Product-Name.112623.0 +3 -0
- tokenizer.json +27 -0
- tokenizer_config.json +24 -0
- training_args.bin +2 -2
README.md
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
---
|
2 |
-
license: apache-2.0
|
3 |
-
base_model: google/flan-t5-small
|
4 |
tags:
|
5 |
- generated_from_trainer
|
6 |
model-index:
|
@@ -13,9 +11,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
13 |
|
14 |
# medication-lists
|
15 |
|
16 |
-
This model
|
17 |
It achieves the following results on the evaluation set:
|
18 |
-
- Loss: 0.
|
19 |
|
20 |
## Model description
|
21 |
|
@@ -34,89 +32,37 @@ More information needed
|
|
34 |
### Training hyperparameters
|
35 |
|
36 |
The following hyperparameters were used during training:
|
37 |
-
- learning_rate: 0.
|
38 |
-
- train_batch_size:
|
39 |
-
- eval_batch_size:
|
40 |
- seed: 42
|
41 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
42 |
- lr_scheduler_type: linear
|
43 |
- lr_scheduler_warmup_ratio: 0.03
|
44 |
-
- num_epochs:
|
45 |
|
46 |
### Training results
|
47 |
|
48 |
| Training Loss | Epoch | Step | Validation Loss |
|
49 |
|:-------------:|:-----:|:----:|:---------------:|
|
50 |
-
|
|
51 |
-
| 0.
|
52 |
-
| 0.
|
53 |
-
| 0.
|
54 |
-
| 0.
|
55 |
-
| 0.
|
56 |
-
| 0.
|
57 |
-
| 0.
|
58 |
-
| 0.
|
59 |
-
| 0.
|
60 |
-
| 0.
|
61 |
-
| 0.
|
62 |
-
| 0.
|
63 |
-
| 0.1589 | 4.27 | 700 | 0.1320 |
|
64 |
-
| 0.1606 | 4.57 | 750 | 0.1230 |
|
65 |
-
| 0.1545 | 4.88 | 800 | 0.1255 |
|
66 |
-
| 0.1502 | 5.18 | 850 | 0.1247 |
|
67 |
-
| 0.1438 | 5.49 | 900 | 0.1251 |
|
68 |
-
| 0.1395 | 5.79 | 950 | 0.1222 |
|
69 |
-
| 0.1414 | 6.1 | 1000 | 0.1173 |
|
70 |
-
| 0.133 | 6.4 | 1050 | 0.1149 |
|
71 |
-
| 0.1338 | 6.71 | 1100 | 0.1124 |
|
72 |
-
| 0.1361 | 7.01 | 1150 | 0.1148 |
|
73 |
-
| 0.1269 | 7.32 | 1200 | 0.1137 |
|
74 |
-
| 0.123 | 7.62 | 1250 | 0.1145 |
|
75 |
-
| 0.1203 | 7.93 | 1300 | 0.1129 |
|
76 |
-
| 0.1194 | 8.23 | 1350 | 0.1081 |
|
77 |
-
| 0.1177 | 8.54 | 1400 | 0.1099 |
|
78 |
-
| 0.1173 | 8.84 | 1450 | 0.1109 |
|
79 |
-
| 0.113 | 9.15 | 1500 | 0.1107 |
|
80 |
-
| 0.1122 | 9.45 | 1550 | 0.1068 |
|
81 |
-
| 0.11 | 9.76 | 1600 | 0.1072 |
|
82 |
-
| 0.1078 | 10.06 | 1650 | 0.1086 |
|
83 |
-
| 0.101 | 10.37 | 1700 | 0.1088 |
|
84 |
-
| 0.1106 | 10.67 | 1750 | 0.1079 |
|
85 |
-
| 0.1094 | 10.98 | 1800 | 0.1109 |
|
86 |
-
| 0.1072 | 11.28 | 1850 | 0.1054 |
|
87 |
-
| 0.103 | 11.59 | 1900 | 0.1062 |
|
88 |
-
| 0.1009 | 11.89 | 1950 | 0.1051 |
|
89 |
-
| 0.1005 | 12.2 | 2000 | 0.1049 |
|
90 |
-
| 0.0985 | 12.5 | 2050 | 0.1059 |
|
91 |
-
| 0.0983 | 12.8 | 2100 | 0.1063 |
|
92 |
-
| 0.0953 | 13.11 | 2150 | 0.1062 |
|
93 |
-
| 0.0935 | 13.41 | 2200 | 0.1044 |
|
94 |
-
| 0.1003 | 13.72 | 2250 | 0.1034 |
|
95 |
-
| 0.0935 | 14.02 | 2300 | 0.1049 |
|
96 |
-
| 0.0935 | 14.33 | 2350 | 0.1038 |
|
97 |
-
| 0.096 | 14.63 | 2400 | 0.1020 |
|
98 |
-
| 0.0894 | 14.94 | 2450 | 0.1048 |
|
99 |
-
| 0.0931 | 15.24 | 2500 | 0.1034 |
|
100 |
-
| 0.0888 | 15.55 | 2550 | 0.1030 |
|
101 |
-
| 0.0904 | 15.85 | 2600 | 0.1038 |
|
102 |
-
| 0.0885 | 16.16 | 2650 | 0.1046 |
|
103 |
-
| 0.088 | 16.46 | 2700 | 0.1041 |
|
104 |
-
| 0.0925 | 16.77 | 2750 | 0.1027 |
|
105 |
-
| 0.0835 | 17.07 | 2800 | 0.1034 |
|
106 |
-
| 0.089 | 17.38 | 2850 | 0.1036 |
|
107 |
-
| 0.0844 | 17.68 | 2900 | 0.1043 |
|
108 |
-
| 0.0866 | 17.99 | 2950 | 0.1031 |
|
109 |
-
| 0.0835 | 18.29 | 3000 | 0.1030 |
|
110 |
-
| 0.0826 | 18.6 | 3050 | 0.1028 |
|
111 |
-
| 0.0874 | 18.9 | 3100 | 0.1018 |
|
112 |
-
| 0.0846 | 19.21 | 3150 | 0.1030 |
|
113 |
-
| 0.0852 | 19.51 | 3200 | 0.1026 |
|
114 |
-
| 0.0835 | 19.82 | 3250 | 0.1027 |
|
115 |
|
116 |
|
117 |
### Framework versions
|
118 |
|
119 |
-
- Transformers 4.35.
|
120 |
-
- Pytorch 2.1
|
121 |
-
- Datasets 2.
|
122 |
- Tokenizers 0.14.1
|
|
|
1 |
---
|
|
|
|
|
2 |
tags:
|
3 |
- generated_from_trainer
|
4 |
model-index:
|
|
|
11 |
|
12 |
# medication-lists
|
13 |
|
14 |
+
This model was trained from scratch on an unknown dataset.
|
15 |
It achieves the following results on the evaluation set:
|
16 |
+
- Loss: 0.0228
|
17 |
|
18 |
## Model description
|
19 |
|
|
|
32 |
### Training hyperparameters
|
33 |
|
34 |
The following hyperparameters were used during training:
|
35 |
+
- learning_rate: 0.004
|
36 |
+
- train_batch_size: 3
|
37 |
+
- eval_batch_size: 3
|
38 |
- seed: 42
|
39 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
40 |
- lr_scheduler_type: linear
|
41 |
- lr_scheduler_warmup_ratio: 0.03
|
42 |
+
- num_epochs: 2
|
43 |
|
44 |
### Training results
|
45 |
|
46 |
| Training Loss | Epoch | Step | Validation Loss |
|
47 |
|:-------------:|:-----:|:----:|:---------------:|
|
48 |
+
| 0.2309 | 0.15 | 400 | 0.1886 |
|
49 |
+
| 0.151 | 0.3 | 800 | 0.1260 |
|
50 |
+
| 0.1061 | 0.45 | 1200 | 0.0852 |
|
51 |
+
| 0.0773 | 0.6 | 1600 | 0.0610 |
|
52 |
+
| 0.0693 | 0.75 | 2000 | 0.0498 |
|
53 |
+
| 0.0505 | 0.9 | 2400 | 0.0428 |
|
54 |
+
| 0.0428 | 1.05 | 2800 | 0.0387 |
|
55 |
+
| 0.0343 | 1.2 | 3200 | 0.0324 |
|
56 |
+
| 0.0289 | 1.35 | 3600 | 0.0299 |
|
57 |
+
| 0.0281 | 1.5 | 4000 | 0.0265 |
|
58 |
+
| 0.0251 | 1.65 | 4400 | 0.0250 |
|
59 |
+
| 0.0208 | 1.8 | 4800 | 0.0236 |
|
60 |
+
| 0.021 | 1.95 | 5200 | 0.0228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
|
63 |
### Framework versions
|
64 |
|
65 |
+
- Transformers 4.35.2
|
66 |
+
- Pytorch 2.0.1+cu117
|
67 |
+
- Datasets 2.14.7
|
68 |
- Tokenizers 0.14.1
|
added_tokens.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"\n": 32100,
|
3 |
+
"{": 32101,
|
4 |
+
"}": 32102
|
5 |
+
}
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
@@ -8,10 +8,10 @@
|
|
8 |
"d_kv": 64,
|
9 |
"d_model": 512,
|
10 |
"decoder_start_token_id": 0,
|
11 |
-
"dense_act_fn": "
|
12 |
"dropout_rate": 0.1,
|
13 |
"eos_token_id": 1,
|
14 |
-
"feed_forward_proj": "
|
15 |
"initializer_factor": 1.0,
|
16 |
"is_encoder_decoder": true,
|
17 |
"is_gated_act": true,
|
@@ -56,7 +56,7 @@
|
|
56 |
},
|
57 |
"tie_word_embeddings": false,
|
58 |
"torch_dtype": "float32",
|
59 |
-
"transformers_version": "4.35.
|
60 |
"use_cache": false,
|
61 |
-
"vocab_size":
|
62 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/mnt/hdd/pretrained_models/flan-t5-small",
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
|
|
8 |
"d_kv": 64,
|
9 |
"d_model": 512,
|
10 |
"decoder_start_token_id": 0,
|
11 |
+
"dense_act_fn": "gelu",
|
12 |
"dropout_rate": 0.1,
|
13 |
"eos_token_id": 1,
|
14 |
+
"feed_forward_proj": "gelu",
|
15 |
"initializer_factor": 1.0,
|
16 |
"is_encoder_decoder": true,
|
17 |
"is_gated_act": true,
|
|
|
56 |
},
|
57 |
"tie_word_embeddings": false,
|
58 |
"torch_dtype": "float32",
|
59 |
+
"transformers_version": "4.35.2",
|
60 |
"use_cache": false,
|
61 |
+
"vocab_size": 32103
|
62 |
}
|
generation_config.json
CHANGED
@@ -3,5 +3,6 @@
|
|
3 |
"decoder_start_token_id": 0,
|
4 |
"eos_token_id": 1,
|
5 |
"pad_token_id": 0,
|
6 |
-
"transformers_version": "4.35.
|
|
|
7 |
}
|
|
|
3 |
"decoder_start_token_id": 0,
|
4 |
"eos_token_id": 1,
|
5 |
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.35.2",
|
7 |
+
"use_cache": false
|
8 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa09322e2aac1456fc50b8dda2c3c76545f9e3cae9a38ce93d775ef0ad9061d9
|
3 |
+
size 307764648
|
runs/Dec01_13-22-37_christopher-System-Product-Name/events.out.tfevents.1701397358.christopher-System-Product-Name.719702.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4eb3a61d4a011aa3211abd36dce4ae173983034f7ef5b864d349ac56bcab732
|
3 |
+
size 5327
|
runs/Dec01_13-23-31_christopher-System-Product-Name/events.out.tfevents.1701397412.christopher-System-Product-Name.720516.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48e6e90b112c989ec4ebf409cc423bb5ee85dce9a7d18e859079ad09dc8d71b5
|
3 |
+
size 5635
|
runs/Dec01_13-24-20_christopher-System-Product-Name/events.out.tfevents.1701397461.christopher-System-Product-Name.721356.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4517dff2d85b966e58f642a093ba393acd29f7a0e17a44ea9ed342d23b15dd80
|
3 |
+
size 4184
|
runs/Dec01_13-25-12_christopher-System-Product-Name/events.out.tfevents.1701397513.christopher-System-Product-Name.722333.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20d3456920e0463052e382f1ccecc2203f97c4babad2198e65440265559aecfe
|
3 |
+
size 5902
|
runs/Dec01_13-26-53_christopher-System-Product-Name/events.out.tfevents.1701397614.christopher-System-Product-Name.724388.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:843bc897063aae6517cefd3eb1e6a5ab28555f6c39b8b6be568396a8fa09d997
|
3 |
+
size 7750
|
runs/Dec01_13-31-52_christopher-System-Product-Name/events.out.tfevents.1701397913.christopher-System-Product-Name.730346.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d12ed56d124c4a6f97e19dba2601af66fe6135ddea3792139f19ae2ceb7a532f
|
3 |
+
size 5485
|
runs/Dec01_13-32-46_christopher-System-Product-Name/events.out.tfevents.1701397967.christopher-System-Product-Name.731453.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b47d95b66ee017e4182ce0d4929968376fa3fa5af135e25837b7708495667ac
|
3 |
+
size 7949
|
runs/Dec01_13-38-13_christopher-System-Product-Name/events.out.tfevents.1701398294.christopher-System-Product-Name.737472.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f78adfa98198c727c4914bb587452d70ae72580fad84b9d169abf2aed04cff5
|
3 |
+
size 7478
|
runs/Dec01_13-42-50_christopher-System-Product-Name/events.out.tfevents.1701398571.christopher-System-Product-Name.742627.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8024b0f9cac171b602252fd7b38e1fea427adca9aa2c08eea3041a513e73d3f3
|
3 |
+
size 25842
|
runs/Nov29_11-48-56_christopher-System-Product-Name/events.out.tfevents.1701218937.christopher-System-Product-Name.111994.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0a2e632428c42d837dea5ec37a18c1df1de12b4750e1aca6ff068343eab2d63
|
3 |
+
size 5327
|
runs/Nov29_11-49-25_christopher-System-Product-Name/events.out.tfevents.1701218966.christopher-System-Product-Name.112623.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57ee57a7670456a4a0c8dbe3ded1dbeff2436a934ec62766c1db64a65be14f44
|
3 |
+
size 13382
|
tokenizer.json
CHANGED
@@ -929,6 +929,33 @@
|
|
929 |
"rstrip": false,
|
930 |
"normalized": false,
|
931 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
932 |
}
|
933 |
],
|
934 |
"normalizer": {
|
|
|
929 |
"rstrip": false,
|
930 |
"normalized": false,
|
931 |
"special": true
|
932 |
+
},
|
933 |
+
{
|
934 |
+
"id": 32100,
|
935 |
+
"content": "\n",
|
936 |
+
"single_word": false,
|
937 |
+
"lstrip": false,
|
938 |
+
"rstrip": false,
|
939 |
+
"normalized": true,
|
940 |
+
"special": false
|
941 |
+
},
|
942 |
+
{
|
943 |
+
"id": 32101,
|
944 |
+
"content": "{",
|
945 |
+
"single_word": false,
|
946 |
+
"lstrip": false,
|
947 |
+
"rstrip": false,
|
948 |
+
"normalized": true,
|
949 |
+
"special": false
|
950 |
+
},
|
951 |
+
{
|
952 |
+
"id": 32102,
|
953 |
+
"content": "}",
|
954 |
+
"single_word": false,
|
955 |
+
"lstrip": false,
|
956 |
+
"rstrip": false,
|
957 |
+
"normalized": true,
|
958 |
+
"special": false
|
959 |
}
|
960 |
],
|
961 |
"normalizer": {
|
tokenizer_config.json
CHANGED
@@ -823,6 +823,30 @@
|
|
823 |
"rstrip": false,
|
824 |
"single_word": false,
|
825 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
826 |
}
|
827 |
},
|
828 |
"additional_special_tokens": [
|
|
|
823 |
"rstrip": false,
|
824 |
"single_word": false,
|
825 |
"special": true
|
826 |
+
},
|
827 |
+
"32100": {
|
828 |
+
"content": "\n",
|
829 |
+
"lstrip": false,
|
830 |
+
"normalized": true,
|
831 |
+
"rstrip": false,
|
832 |
+
"single_word": false,
|
833 |
+
"special": false
|
834 |
+
},
|
835 |
+
"32101": {
|
836 |
+
"content": "{",
|
837 |
+
"lstrip": false,
|
838 |
+
"normalized": true,
|
839 |
+
"rstrip": false,
|
840 |
+
"single_word": false,
|
841 |
+
"special": false
|
842 |
+
},
|
843 |
+
"32102": {
|
844 |
+
"content": "}",
|
845 |
+
"lstrip": false,
|
846 |
+
"normalized": true,
|
847 |
+
"rstrip": false,
|
848 |
+
"single_word": false,
|
849 |
+
"special": false
|
850 |
}
|
851 |
},
|
852 |
"additional_special_tokens": [
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b37bd029f8e6712134df6d32e0ac86d8a68ab00fd8b8b33747c29c726abc98c
|
3 |
+
size 4347
|