rrodrigu3z commited on
Commit
5fc5287
·
1 Parent(s): 6782ab5

End of training

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.1776
19
 
20
  ## Model description
21
 
 
15
 
16
  This model is a fine-tuned version of [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.1777
19
 
20
  ## Model description
21
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 1.1776556968688965,
4
+ "eval_runtime": 4.423,
5
+ "eval_samples": 100,
6
+ "eval_samples_per_second": 22.609,
7
+ "eval_steps_per_second": 5.652,
8
+ "train_loss": 1.2418343523557192,
9
+ "train_runtime": 36070.0373,
10
+ "train_samples": 87560,
11
+ "train_samples_per_second": 7.282,
12
+ "train_steps_per_second": 0.455
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_loss": 1.1776556968688965,
4
+ "eval_runtime": 4.423,
5
+ "eval_samples": 100,
6
+ "eval_samples_per_second": 22.609,
7
+ "eval_steps_per_second": 5.652
8
+ }
runs/Dec17_20-54-28_psn3ticfngvn/events.out.tfevents.1702882704.psn3ticfngvn.101611.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63d51e93ab2b19094ecfb6aadff953c5857cab26fdfd5de6005b84fd7515e5de
3
+ size 316
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 1.2418343523557192,
4
+ "train_runtime": 36070.0373,
5
+ "train_samples": 87560,
6
+ "train_samples_per_second": 7.282,
7
+ "train_steps_per_second": 0.455
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.999725902238465,
5
+ "eval_steps": 2000,
6
+ "global_step": 16416,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09,
13
+ "learning_rate": 4.8477095516569206e-05,
14
+ "loss": 1.4175,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.18,
19
+ "learning_rate": 4.69541910331384e-05,
20
+ "loss": 1.3652,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.27,
25
+ "learning_rate": 4.54312865497076e-05,
26
+ "loss": 1.3497,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.37,
31
+ "learning_rate": 4.39083820662768e-05,
32
+ "loss": 1.3443,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.37,
37
+ "eval_loss": 1.2274980545043945,
38
+ "eval_runtime": 4.5329,
39
+ "eval_samples_per_second": 22.061,
40
+ "eval_steps_per_second": 5.515,
41
+ "step": 2000
42
+ },
43
+ {
44
+ "epoch": 0.46,
45
+ "learning_rate": 4.238547758284601e-05,
46
+ "loss": 1.3359,
47
+ "step": 2500
48
+ },
49
+ {
50
+ "epoch": 0.55,
51
+ "learning_rate": 4.086257309941521e-05,
52
+ "loss": 1.3317,
53
+ "step": 3000
54
+ },
55
+ {
56
+ "epoch": 0.64,
57
+ "learning_rate": 3.933966861598441e-05,
58
+ "loss": 1.322,
59
+ "step": 3500
60
+ },
61
+ {
62
+ "epoch": 0.73,
63
+ "learning_rate": 3.7816764132553604e-05,
64
+ "loss": 1.3148,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.73,
69
+ "eval_loss": 1.2038463354110718,
70
+ "eval_runtime": 4.5544,
71
+ "eval_samples_per_second": 21.957,
72
+ "eval_steps_per_second": 5.489,
73
+ "step": 4000
74
+ },
75
+ {
76
+ "epoch": 0.82,
77
+ "learning_rate": 3.629385964912281e-05,
78
+ "loss": 1.3261,
79
+ "step": 4500
80
+ },
81
+ {
82
+ "epoch": 0.91,
83
+ "learning_rate": 3.477095516569201e-05,
84
+ "loss": 1.3198,
85
+ "step": 5000
86
+ },
87
+ {
88
+ "epoch": 1.01,
89
+ "learning_rate": 3.3248050682261215e-05,
90
+ "loss": 1.3166,
91
+ "step": 5500
92
+ },
93
+ {
94
+ "epoch": 1.1,
95
+ "learning_rate": 3.172514619883041e-05,
96
+ "loss": 1.2259,
97
+ "step": 6000
98
+ },
99
+ {
100
+ "epoch": 1.1,
101
+ "eval_loss": 1.1959730386734009,
102
+ "eval_runtime": 4.5591,
103
+ "eval_samples_per_second": 21.934,
104
+ "eval_steps_per_second": 5.484,
105
+ "step": 6000
106
+ },
107
+ {
108
+ "epoch": 1.19,
109
+ "learning_rate": 3.0202241715399608e-05,
110
+ "loss": 1.2181,
111
+ "step": 6500
112
+ },
113
+ {
114
+ "epoch": 1.28,
115
+ "learning_rate": 2.8679337231968812e-05,
116
+ "loss": 1.2247,
117
+ "step": 7000
118
+ },
119
+ {
120
+ "epoch": 1.37,
121
+ "learning_rate": 2.7156432748538012e-05,
122
+ "loss": 1.2228,
123
+ "step": 7500
124
+ },
125
+ {
126
+ "epoch": 1.46,
127
+ "learning_rate": 2.5633528265107216e-05,
128
+ "loss": 1.2271,
129
+ "step": 8000
130
+ },
131
+ {
132
+ "epoch": 1.46,
133
+ "eval_loss": 1.1902621984481812,
134
+ "eval_runtime": 4.5576,
135
+ "eval_samples_per_second": 21.941,
136
+ "eval_steps_per_second": 5.485,
137
+ "step": 8000
138
+ },
139
+ {
140
+ "epoch": 1.55,
141
+ "learning_rate": 2.4110623781676416e-05,
142
+ "loss": 1.2324,
143
+ "step": 8500
144
+ },
145
+ {
146
+ "epoch": 1.64,
147
+ "learning_rate": 2.2587719298245616e-05,
148
+ "loss": 1.219,
149
+ "step": 9000
150
+ },
151
+ {
152
+ "epoch": 1.74,
153
+ "learning_rate": 2.1064814814814816e-05,
154
+ "loss": 1.2294,
155
+ "step": 9500
156
+ },
157
+ {
158
+ "epoch": 1.83,
159
+ "learning_rate": 1.9541910331384016e-05,
160
+ "loss": 1.2224,
161
+ "step": 10000
162
+ },
163
+ {
164
+ "epoch": 1.83,
165
+ "eval_loss": 1.1794958114624023,
166
+ "eval_runtime": 4.5594,
167
+ "eval_samples_per_second": 21.933,
168
+ "eval_steps_per_second": 5.483,
169
+ "step": 10000
170
+ },
171
+ {
172
+ "epoch": 1.92,
173
+ "learning_rate": 1.8019005847953216e-05,
174
+ "loss": 1.2206,
175
+ "step": 10500
176
+ },
177
+ {
178
+ "epoch": 2.01,
179
+ "learning_rate": 1.6496101364522417e-05,
180
+ "loss": 1.2081,
181
+ "step": 11000
182
+ },
183
+ {
184
+ "epoch": 2.1,
185
+ "learning_rate": 1.497319688109162e-05,
186
+ "loss": 1.165,
187
+ "step": 11500
188
+ },
189
+ {
190
+ "epoch": 2.19,
191
+ "learning_rate": 1.3450292397660819e-05,
192
+ "loss": 1.166,
193
+ "step": 12000
194
+ },
195
+ {
196
+ "epoch": 2.19,
197
+ "eval_loss": 1.1834660768508911,
198
+ "eval_runtime": 4.5566,
199
+ "eval_samples_per_second": 21.946,
200
+ "eval_steps_per_second": 5.487,
201
+ "step": 12000
202
+ },
203
+ {
204
+ "epoch": 2.28,
205
+ "learning_rate": 1.192738791423002e-05,
206
+ "loss": 1.1471,
207
+ "step": 12500
208
+ },
209
+ {
210
+ "epoch": 2.38,
211
+ "learning_rate": 1.040448343079922e-05,
212
+ "loss": 1.1654,
213
+ "step": 13000
214
+ },
215
+ {
216
+ "epoch": 2.47,
217
+ "learning_rate": 8.881578947368421e-06,
218
+ "loss": 1.1517,
219
+ "step": 13500
220
+ },
221
+ {
222
+ "epoch": 2.56,
223
+ "learning_rate": 7.358674463937622e-06,
224
+ "loss": 1.1599,
225
+ "step": 14000
226
+ },
227
+ {
228
+ "epoch": 2.56,
229
+ "eval_loss": 1.1791805028915405,
230
+ "eval_runtime": 4.5397,
231
+ "eval_samples_per_second": 22.028,
232
+ "eval_steps_per_second": 5.507,
233
+ "step": 14000
234
+ },
235
+ {
236
+ "epoch": 2.65,
237
+ "learning_rate": 5.835769980506823e-06,
238
+ "loss": 1.1621,
239
+ "step": 14500
240
+ },
241
+ {
242
+ "epoch": 2.74,
243
+ "learning_rate": 4.312865497076023e-06,
244
+ "loss": 1.1725,
245
+ "step": 15000
246
+ },
247
+ {
248
+ "epoch": 2.83,
249
+ "learning_rate": 2.7899610136452243e-06,
250
+ "loss": 1.1607,
251
+ "step": 15500
252
+ },
253
+ {
254
+ "epoch": 2.92,
255
+ "learning_rate": 1.2670565302144249e-06,
256
+ "loss": 1.1644,
257
+ "step": 16000
258
+ },
259
+ {
260
+ "epoch": 2.92,
261
+ "eval_loss": 1.1775761842727661,
262
+ "eval_runtime": 4.5403,
263
+ "eval_samples_per_second": 22.025,
264
+ "eval_steps_per_second": 5.506,
265
+ "step": 16000
266
+ },
267
+ {
268
+ "epoch": 3.0,
269
+ "step": 16416,
270
+ "total_flos": 6.325107751849329e+17,
271
+ "train_loss": 1.2418343523557192,
272
+ "train_runtime": 36070.0373,
273
+ "train_samples_per_second": 7.282,
274
+ "train_steps_per_second": 0.455
275
+ }
276
+ ],
277
+ "logging_steps": 500,
278
+ "max_steps": 16416,
279
+ "num_train_epochs": 3,
280
+ "save_steps": 500,
281
+ "total_flos": 6.325107751849329e+17,
282
+ "trial_name": null,
283
+ "trial_params": null
284
+ }