VOKulus commited on
Commit
50f5dc7
·
verified ·
1 Parent(s): 97d3ecb

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ library_name: transformers
4
+ tags:
5
+ - autotrain
6
+ - question-answering
7
+ base_model: deepset/roberta-base-squad2
8
+ widget:
9
+ - text: "Who loves AutoTrain?"
10
+ context: "Everyone loves AutoTrain"
11
+ datasets:
12
+ - VOKulus/test
13
+ ---
14
+
15
+ # Model Trained Using AutoTrain
16
+
17
+ - Problem type: Extractive Question Answering
18
+
19
+ ## Validation Metrics
20
+
21
+ loss: 6.235438195290044e-05
22
+
23
+ exact_match: 99.7703
24
+
25
+ f1: 99.8851
26
+
27
+ runtime: 18.3183
28
+
29
+ samples_per_second: 77.627
30
+
31
+ steps_per_second: 9.717
32
+
33
+ : 2.0
34
+
35
+ ## Usage
36
+
37
+
38
+ ```python
39
+ import torch
40
+
41
+ from transformers import AutoModelForQuestionAnswering, AutoTokenizer
42
+
43
+ model = AutoModelForQuestionAnswering.from_pretrained(...)
44
+
45
+ tokenizer = AutoTokenizer.from_pretrained(...)
46
+
47
+ from transformers import BertTokenizer, BertForQuestionAnswering
48
+
49
+ question, text = "Who loves AutoTrain?", "Everyone loves AutoTrain"
50
+
51
+ inputs = tokenizer(question, text, return_tensors='pt')
52
+
53
+ start_positions = torch.tensor([1])
54
+
55
+ end_positions = torch.tensor([3])
56
+
57
+ outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
58
+
59
+ loss = outputs.loss
60
+
61
+ start_scores = outputs.start_logits
62
+
63
+ end_scores = outputs.end_logits
64
+ ```
checkpoint-2858/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepset/roberta-base-squad2",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "language": "english",
17
+ "layer_norm_eps": 1e-05,
18
+ "max_position_embeddings": 514,
19
+ "model_type": "roberta",
20
+ "name": "Roberta",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "pad_token_id": 1,
24
+ "position_embedding_type": "absolute",
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.48.0",
27
+ "type_vocab_size": 1,
28
+ "use_cache": true,
29
+ "vocab_size": 50265
30
+ }
checkpoint-2858/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d9d1bde8b7624d84887bf84e5395e3cc1556658d2a7677a1b32e7734e09fa24
3
+ size 496250232
checkpoint-2858/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bb819b0991628d959e4c7393f6085c36476a7f8645c2bbe0ad2d10ef177fc9f
3
+ size 992619066
checkpoint-2858/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b68cf39d51ae0fda37757295ed75e9048e8a51b6fcb64a1285662054773cb22
3
+ size 14244
checkpoint-2858/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2282cfd37433b089629f81059c258eb782f84a53354c61b9d9dbe616f7d530
3
+ size 1064
checkpoint-2858/trainer_state.json ADDED
@@ -0,0 +1,860 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 6.235438195290044e-05,
3
+ "best_model_checkpoint": "my-model-test-roberta/checkpoint-2858",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2858,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01749475157452764,
13
+ "grad_norm": 130.36558532714844,
14
+ "learning_rate": 1.3986013986013987e-06,
15
+ "loss": 5.7756,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.03498950314905528,
20
+ "grad_norm": 65.01083374023438,
21
+ "learning_rate": 3.0769230769230774e-06,
22
+ "loss": 3.2408,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.052484254723582924,
27
+ "grad_norm": 54.41019058227539,
28
+ "learning_rate": 4.8251748251748255e-06,
29
+ "loss": 1.8933,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.06997900629811056,
34
+ "grad_norm": 66.41553497314453,
35
+ "learning_rate": 6.573426573426574e-06,
36
+ "loss": 1.2565,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.08747375787263821,
41
+ "grad_norm": 32.627281188964844,
42
+ "learning_rate": 8.321678321678323e-06,
43
+ "loss": 0.7743,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.10496850944716585,
48
+ "grad_norm": 46.808109283447266,
49
+ "learning_rate": 1.0069930069930071e-05,
50
+ "loss": 0.4214,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.1224632610216935,
55
+ "grad_norm": 77.44200897216797,
56
+ "learning_rate": 1.181818181818182e-05,
57
+ "loss": 0.4034,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.13995801259622112,
62
+ "grad_norm": 52.945068359375,
63
+ "learning_rate": 1.3566433566433568e-05,
64
+ "loss": 0.2332,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.15745276417074877,
69
+ "grad_norm": 0.948405921459198,
70
+ "learning_rate": 1.5314685314685317e-05,
71
+ "loss": 0.1798,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.17494751574527642,
76
+ "grad_norm": 0.12931326031684875,
77
+ "learning_rate": 1.7062937062937065e-05,
78
+ "loss": 0.0596,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.19244226731980407,
83
+ "grad_norm": 0.5479409098625183,
84
+ "learning_rate": 1.881118881118881e-05,
85
+ "loss": 0.0956,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.2099370188943317,
90
+ "grad_norm": 163.63729858398438,
91
+ "learning_rate": 1.9937791601866253e-05,
92
+ "loss": 0.2539,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.22743177046885935,
97
+ "grad_norm": 0.04818764701485634,
98
+ "learning_rate": 1.974339035769829e-05,
99
+ "loss": 0.116,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.244926522043387,
104
+ "grad_norm": 0.11932364106178284,
105
+ "learning_rate": 1.954898911353033e-05,
106
+ "loss": 0.0437,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.2624212736179146,
111
+ "grad_norm": 0.0045976778492331505,
112
+ "learning_rate": 1.9354587869362366e-05,
113
+ "loss": 0.0842,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.27991602519244224,
118
+ "grad_norm": 0.051815927028656006,
119
+ "learning_rate": 1.9160186625194403e-05,
120
+ "loss": 0.0512,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.2974107767669699,
125
+ "grad_norm": 0.02530599944293499,
126
+ "learning_rate": 1.896578538102644e-05,
127
+ "loss": 0.0019,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.31490552834149754,
132
+ "grad_norm": 0.14514470100402832,
133
+ "learning_rate": 1.877138413685848e-05,
134
+ "loss": 0.0056,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.33240027991602517,
139
+ "grad_norm": 303.62939453125,
140
+ "learning_rate": 1.8576982892690513e-05,
141
+ "loss": 0.1492,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.34989503149055284,
146
+ "grad_norm": 0.003696146886795759,
147
+ "learning_rate": 1.8382581648522554e-05,
148
+ "loss": 0.0045,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.36738978306508047,
153
+ "grad_norm": 20.425518035888672,
154
+ "learning_rate": 1.818818040435459e-05,
155
+ "loss": 0.0463,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.38488453463960814,
160
+ "grad_norm": 0.09205462783575058,
161
+ "learning_rate": 1.7993779160186625e-05,
162
+ "loss": 0.0017,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.40237928621413577,
167
+ "grad_norm": 0.038981515914201736,
168
+ "learning_rate": 1.7799377916018663e-05,
169
+ "loss": 0.0606,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.4198740377886634,
174
+ "grad_norm": 0.1848757266998291,
175
+ "learning_rate": 1.76049766718507e-05,
176
+ "loss": 0.0559,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.43736878936319107,
181
+ "grad_norm": 0.0069680167362093925,
182
+ "learning_rate": 1.7410575427682738e-05,
183
+ "loss": 0.0523,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.4548635409377187,
188
+ "grad_norm": 0.011184507980942726,
189
+ "learning_rate": 1.7216174183514775e-05,
190
+ "loss": 0.0003,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.4723582925122463,
195
+ "grad_norm": 0.011598587967455387,
196
+ "learning_rate": 1.7021772939346813e-05,
197
+ "loss": 0.0901,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.489853044086774,
202
+ "grad_norm": 0.7256177067756653,
203
+ "learning_rate": 1.682737169517885e-05,
204
+ "loss": 0.0006,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.5073477956613016,
209
+ "grad_norm": 0.002232016297057271,
210
+ "learning_rate": 1.6632970451010888e-05,
211
+ "loss": 0.0017,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.5248425472358292,
216
+ "grad_norm": 0.0009716423810459673,
217
+ "learning_rate": 1.6438569206842926e-05,
218
+ "loss": 0.0654,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.5423372988103569,
223
+ "grad_norm": 2.8868448734283447,
224
+ "learning_rate": 1.6244167962674963e-05,
225
+ "loss": 0.0003,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.5598320503848845,
230
+ "grad_norm": 0.002249341458082199,
231
+ "learning_rate": 1.6049766718507e-05,
232
+ "loss": 0.0002,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.5773268019594122,
237
+ "grad_norm": 0.0035393834114074707,
238
+ "learning_rate": 1.5855365474339038e-05,
239
+ "loss": 0.0006,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.5948215535339398,
244
+ "grad_norm": 0.007113989442586899,
245
+ "learning_rate": 1.5660964230171072e-05,
246
+ "loss": 0.0002,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.6123163051084675,
251
+ "grad_norm": 0.813864529132843,
252
+ "learning_rate": 1.546656298600311e-05,
253
+ "loss": 0.0004,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.6298110566829951,
258
+ "grad_norm": 0.04794127866625786,
259
+ "learning_rate": 1.527216174183515e-05,
260
+ "loss": 0.0003,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.6473058082575227,
265
+ "grad_norm": 0.0020588026382029057,
266
+ "learning_rate": 1.5077760497667187e-05,
267
+ "loss": 0.0333,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 0.6648005598320503,
272
+ "grad_norm": 4.8790507316589355,
273
+ "learning_rate": 1.4883359253499223e-05,
274
+ "loss": 0.0038,
275
+ "step": 950
276
+ },
277
+ {
278
+ "epoch": 0.6822953114065781,
279
+ "grad_norm": 0.5302098989486694,
280
+ "learning_rate": 1.468895800933126e-05,
281
+ "loss": 0.0062,
282
+ "step": 975
283
+ },
284
+ {
285
+ "epoch": 0.6997900629811057,
286
+ "grad_norm": 0.0019412849796935916,
287
+ "learning_rate": 1.44945567651633e-05,
288
+ "loss": 0.0001,
289
+ "step": 1000
290
+ },
291
+ {
292
+ "epoch": 0.7172848145556333,
293
+ "grad_norm": 0.00042624305933713913,
294
+ "learning_rate": 1.4300155520995335e-05,
295
+ "loss": 0.0002,
296
+ "step": 1025
297
+ },
298
+ {
299
+ "epoch": 0.7347795661301609,
300
+ "grad_norm": 0.002252366626635194,
301
+ "learning_rate": 1.4105754276827373e-05,
302
+ "loss": 0.0,
303
+ "step": 1050
304
+ },
305
+ {
306
+ "epoch": 0.7522743177046886,
307
+ "grad_norm": 0.0027475322131067514,
308
+ "learning_rate": 1.3911353032659409e-05,
309
+ "loss": 0.0566,
310
+ "step": 1075
311
+ },
312
+ {
313
+ "epoch": 0.7697690692792163,
314
+ "grad_norm": 0.009604093618690968,
315
+ "learning_rate": 1.3716951788491448e-05,
316
+ "loss": 0.002,
317
+ "step": 1100
318
+ },
319
+ {
320
+ "epoch": 0.7872638208537439,
321
+ "grad_norm": 0.0056050559505820274,
322
+ "learning_rate": 1.3522550544323485e-05,
323
+ "loss": 0.0131,
324
+ "step": 1125
325
+ },
326
+ {
327
+ "epoch": 0.8047585724282715,
328
+ "grad_norm": 0.0009983275085687637,
329
+ "learning_rate": 1.3328149300155521e-05,
330
+ "loss": 0.0055,
331
+ "step": 1150
332
+ },
333
+ {
334
+ "epoch": 0.8222533240027992,
335
+ "grad_norm": 0.000412652239901945,
336
+ "learning_rate": 1.3133748055987559e-05,
337
+ "loss": 0.0352,
338
+ "step": 1175
339
+ },
340
+ {
341
+ "epoch": 0.8397480755773268,
342
+ "grad_norm": 0.002874561119824648,
343
+ "learning_rate": 1.2939346811819598e-05,
344
+ "loss": 0.0006,
345
+ "step": 1200
346
+ },
347
+ {
348
+ "epoch": 0.8572428271518544,
349
+ "grad_norm": 0.01263987272977829,
350
+ "learning_rate": 1.2744945567651634e-05,
351
+ "loss": 0.0378,
352
+ "step": 1225
353
+ },
354
+ {
355
+ "epoch": 0.8747375787263821,
356
+ "grad_norm": 0.007837435230612755,
357
+ "learning_rate": 1.2550544323483671e-05,
358
+ "loss": 0.0001,
359
+ "step": 1250
360
+ },
361
+ {
362
+ "epoch": 0.8922323303009098,
363
+ "grad_norm": 0.0008695307769812644,
364
+ "learning_rate": 1.2356143079315707e-05,
365
+ "loss": 0.0011,
366
+ "step": 1275
367
+ },
368
+ {
369
+ "epoch": 0.9097270818754374,
370
+ "grad_norm": 0.0004545428091660142,
371
+ "learning_rate": 1.2161741835147746e-05,
372
+ "loss": 0.0001,
373
+ "step": 1300
374
+ },
375
+ {
376
+ "epoch": 0.927221833449965,
377
+ "grad_norm": 0.014842044562101364,
378
+ "learning_rate": 1.1967340590979784e-05,
379
+ "loss": 0.0,
380
+ "step": 1325
381
+ },
382
+ {
383
+ "epoch": 0.9447165850244926,
384
+ "grad_norm": 0.008039949461817741,
385
+ "learning_rate": 1.177293934681182e-05,
386
+ "loss": 0.0017,
387
+ "step": 1350
388
+ },
389
+ {
390
+ "epoch": 0.9622113365990203,
391
+ "grad_norm": 0.0005223533953540027,
392
+ "learning_rate": 1.1578538102643857e-05,
393
+ "loss": 0.0004,
394
+ "step": 1375
395
+ },
396
+ {
397
+ "epoch": 0.979706088173548,
398
+ "grad_norm": 0.0010761632584035397,
399
+ "learning_rate": 1.1384136858475897e-05,
400
+ "loss": 0.0001,
401
+ "step": 1400
402
+ },
403
+ {
404
+ "epoch": 0.9972008397480756,
405
+ "grad_norm": 0.0003285344282630831,
406
+ "learning_rate": 1.1189735614307932e-05,
407
+ "loss": 0.0,
408
+ "step": 1425
409
+ },
410
+ {
411
+ "epoch": 1.0,
412
+ "eval_exact_match": 100.0,
413
+ "eval_f1": 100.0,
414
+ "eval_loss": 0.005018405616283417,
415
+ "eval_runtime": 17.7026,
416
+ "eval_samples_per_second": 80.327,
417
+ "eval_steps_per_second": 10.055,
418
+ "step": 1429
419
+ },
420
+ {
421
+ "epoch": 1.0146955913226032,
422
+ "grad_norm": 0.0008568214834667742,
423
+ "learning_rate": 1.099533437013997e-05,
424
+ "loss": 0.0,
425
+ "step": 1450
426
+ },
427
+ {
428
+ "epoch": 1.0321903428971309,
429
+ "grad_norm": 0.0027714003808796406,
430
+ "learning_rate": 1.0800933125972006e-05,
431
+ "loss": 0.0,
432
+ "step": 1475
433
+ },
434
+ {
435
+ "epoch": 1.0496850944716585,
436
+ "grad_norm": 0.0010522945085540414,
437
+ "learning_rate": 1.0606531881804045e-05,
438
+ "loss": 0.0,
439
+ "step": 1500
440
+ },
441
+ {
442
+ "epoch": 1.067179846046186,
443
+ "grad_norm": 0.0003821647842414677,
444
+ "learning_rate": 1.0412130637636083e-05,
445
+ "loss": 0.0,
446
+ "step": 1525
447
+ },
448
+ {
449
+ "epoch": 1.0846745976207137,
450
+ "grad_norm": 0.0006792128551751375,
451
+ "learning_rate": 1.0217729393468118e-05,
452
+ "loss": 0.0,
453
+ "step": 1550
454
+ },
455
+ {
456
+ "epoch": 1.1021693491952413,
457
+ "grad_norm": 0.0005681074107997119,
458
+ "learning_rate": 1.0023328149300156e-05,
459
+ "loss": 0.0,
460
+ "step": 1575
461
+ },
462
+ {
463
+ "epoch": 1.119664100769769,
464
+ "grad_norm": 0.001575466594658792,
465
+ "learning_rate": 9.828926905132194e-06,
466
+ "loss": 0.02,
467
+ "step": 1600
468
+ },
469
+ {
470
+ "epoch": 1.1371588523442968,
471
+ "grad_norm": 0.0008907430456019938,
472
+ "learning_rate": 9.634525660964231e-06,
473
+ "loss": 0.0015,
474
+ "step": 1625
475
+ },
476
+ {
477
+ "epoch": 1.1546536039188244,
478
+ "grad_norm": 0.3030645549297333,
479
+ "learning_rate": 9.440124416796269e-06,
480
+ "loss": 0.0,
481
+ "step": 1650
482
+ },
483
+ {
484
+ "epoch": 1.172148355493352,
485
+ "grad_norm": 0.000742213916964829,
486
+ "learning_rate": 9.245723172628306e-06,
487
+ "loss": 0.0617,
488
+ "step": 1675
489
+ },
490
+ {
491
+ "epoch": 1.1896431070678797,
492
+ "grad_norm": 0.0004069434362463653,
493
+ "learning_rate": 9.051321928460342e-06,
494
+ "loss": 0.0001,
495
+ "step": 1700
496
+ },
497
+ {
498
+ "epoch": 1.2071378586424073,
499
+ "grad_norm": 0.0017081464175134897,
500
+ "learning_rate": 8.856920684292381e-06,
501
+ "loss": 0.0,
502
+ "step": 1725
503
+ },
504
+ {
505
+ "epoch": 1.224632610216935,
506
+ "grad_norm": 0.0006449994398280978,
507
+ "learning_rate": 8.662519440124417e-06,
508
+ "loss": 0.0,
509
+ "step": 1750
510
+ },
511
+ {
512
+ "epoch": 1.2421273617914625,
513
+ "grad_norm": 1.1132986545562744,
514
+ "learning_rate": 8.468118195956455e-06,
515
+ "loss": 0.0151,
516
+ "step": 1775
517
+ },
518
+ {
519
+ "epoch": 1.2596221133659902,
520
+ "grad_norm": 0.0004918717895634472,
521
+ "learning_rate": 8.273716951788492e-06,
522
+ "loss": 0.0012,
523
+ "step": 1800
524
+ },
525
+ {
526
+ "epoch": 1.2771168649405178,
527
+ "grad_norm": 0.0007090566796250641,
528
+ "learning_rate": 8.07931570762053e-06,
529
+ "loss": 0.0136,
530
+ "step": 1825
531
+ },
532
+ {
533
+ "epoch": 1.2946116165150454,
534
+ "grad_norm": 0.020147522911429405,
535
+ "learning_rate": 7.884914463452567e-06,
536
+ "loss": 0.0002,
537
+ "step": 1850
538
+ },
539
+ {
540
+ "epoch": 1.312106368089573,
541
+ "grad_norm": 0.0021832261700183153,
542
+ "learning_rate": 7.690513219284605e-06,
543
+ "loss": 0.0001,
544
+ "step": 1875
545
+ },
546
+ {
547
+ "epoch": 1.3296011196641007,
548
+ "grad_norm": 0.028366833925247192,
549
+ "learning_rate": 7.496111975116641e-06,
550
+ "loss": 0.0,
551
+ "step": 1900
552
+ },
553
+ {
554
+ "epoch": 1.3470958712386283,
555
+ "grad_norm": 0.0010503004305064678,
556
+ "learning_rate": 7.301710730948679e-06,
557
+ "loss": 0.0007,
558
+ "step": 1925
559
+ },
560
+ {
561
+ "epoch": 1.3645906228131561,
562
+ "grad_norm": 0.008805891498923302,
563
+ "learning_rate": 7.107309486780716e-06,
564
+ "loss": 0.033,
565
+ "step": 1950
566
+ },
567
+ {
568
+ "epoch": 1.3820853743876838,
569
+ "grad_norm": 0.021400198340415955,
570
+ "learning_rate": 6.912908242612753e-06,
571
+ "loss": 0.0,
572
+ "step": 1975
573
+ },
574
+ {
575
+ "epoch": 1.3995801259622114,
576
+ "grad_norm": 0.0005948548787273467,
577
+ "learning_rate": 6.71850699844479e-06,
578
+ "loss": 0.0,
579
+ "step": 2000
580
+ },
581
+ {
582
+ "epoch": 1.417074877536739,
583
+ "grad_norm": 0.0006943101761862636,
584
+ "learning_rate": 6.524105754276828e-06,
585
+ "loss": 0.0,
586
+ "step": 2025
587
+ },
588
+ {
589
+ "epoch": 1.4345696291112666,
590
+ "grad_norm": 0.0013550578150898218,
591
+ "learning_rate": 6.329704510108865e-06,
592
+ "loss": 0.0,
593
+ "step": 2050
594
+ },
595
+ {
596
+ "epoch": 1.4520643806857942,
597
+ "grad_norm": 0.0002896255755331367,
598
+ "learning_rate": 6.135303265940903e-06,
599
+ "loss": 0.0,
600
+ "step": 2075
601
+ },
602
+ {
603
+ "epoch": 1.4695591322603219,
604
+ "grad_norm": 0.0011648598592728376,
605
+ "learning_rate": 5.940902021772939e-06,
606
+ "loss": 0.0001,
607
+ "step": 2100
608
+ },
609
+ {
610
+ "epoch": 1.4870538838348495,
611
+ "grad_norm": 0.020712416619062424,
612
+ "learning_rate": 5.746500777604978e-06,
613
+ "loss": 0.0,
614
+ "step": 2125
615
+ },
616
+ {
617
+ "epoch": 1.5045486354093773,
618
+ "grad_norm": 0.0005796013865619898,
619
+ "learning_rate": 5.5520995334370144e-06,
620
+ "loss": 0.0005,
621
+ "step": 2150
622
+ },
623
+ {
624
+ "epoch": 1.522043386983905,
625
+ "grad_norm": 0.014175205491483212,
626
+ "learning_rate": 5.357698289269052e-06,
627
+ "loss": 0.0015,
628
+ "step": 2175
629
+ },
630
+ {
631
+ "epoch": 1.5395381385584326,
632
+ "grad_norm": 7.142549991607666,
633
+ "learning_rate": 5.163297045101089e-06,
634
+ "loss": 0.0215,
635
+ "step": 2200
636
+ },
637
+ {
638
+ "epoch": 1.5570328901329602,
639
+ "grad_norm": 0.0004311289812903851,
640
+ "learning_rate": 4.968895800933126e-06,
641
+ "loss": 0.0,
642
+ "step": 2225
643
+ },
644
+ {
645
+ "epoch": 1.5745276417074878,
646
+ "grad_norm": 0.0007753855898045003,
647
+ "learning_rate": 4.774494556765164e-06,
648
+ "loss": 0.0,
649
+ "step": 2250
650
+ },
651
+ {
652
+ "epoch": 1.5920223932820154,
653
+ "grad_norm": 0.0002963803126476705,
654
+ "learning_rate": 4.5800933125972005e-06,
655
+ "loss": 0.0,
656
+ "step": 2275
657
+ },
658
+ {
659
+ "epoch": 1.609517144856543,
660
+ "grad_norm": 0.0010399603052064776,
661
+ "learning_rate": 4.385692068429238e-06,
662
+ "loss": 0.0,
663
+ "step": 2300
664
+ },
665
+ {
666
+ "epoch": 1.6270118964310707,
667
+ "grad_norm": 0.000952723843511194,
668
+ "learning_rate": 4.1912908242612755e-06,
669
+ "loss": 0.0,
670
+ "step": 2325
671
+ },
672
+ {
673
+ "epoch": 1.6445066480055983,
674
+ "grad_norm": 0.00023090622562449425,
675
+ "learning_rate": 3.996889580093313e-06,
676
+ "loss": 0.0,
677
+ "step": 2350
678
+ },
679
+ {
680
+ "epoch": 1.662001399580126,
681
+ "grad_norm": 0.008954511024057865,
682
+ "learning_rate": 3.80248833592535e-06,
683
+ "loss": 0.0003,
684
+ "step": 2375
685
+ },
686
+ {
687
+ "epoch": 1.6794961511546536,
688
+ "grad_norm": 0.0010821650503203273,
689
+ "learning_rate": 3.6080870917573873e-06,
690
+ "loss": 0.0,
691
+ "step": 2400
692
+ },
693
+ {
694
+ "epoch": 1.6969909027291812,
695
+ "grad_norm": 0.0006221202784217894,
696
+ "learning_rate": 3.413685847589425e-06,
697
+ "loss": 0.0214,
698
+ "step": 2425
699
+ },
700
+ {
701
+ "epoch": 1.7144856543037088,
702
+ "grad_norm": 0.004466580227017403,
703
+ "learning_rate": 3.219284603421462e-06,
704
+ "loss": 0.0,
705
+ "step": 2450
706
+ },
707
+ {
708
+ "epoch": 1.7319804058782364,
709
+ "grad_norm": 0.002296778140589595,
710
+ "learning_rate": 3.024883359253499e-06,
711
+ "loss": 0.0015,
712
+ "step": 2475
713
+ },
714
+ {
715
+ "epoch": 1.749475157452764,
716
+ "grad_norm": 0.00047575862845405936,
717
+ "learning_rate": 2.8304821150855366e-06,
718
+ "loss": 0.0,
719
+ "step": 2500
720
+ },
721
+ {
722
+ "epoch": 1.7669699090272917,
723
+ "grad_norm": 0.0030999884475022554,
724
+ "learning_rate": 2.6360808709175738e-06,
725
+ "loss": 0.0,
726
+ "step": 2525
727
+ },
728
+ {
729
+ "epoch": 1.7844646606018193,
730
+ "grad_norm": 0.04565088450908661,
731
+ "learning_rate": 2.4416796267496113e-06,
732
+ "loss": 0.0003,
733
+ "step": 2550
734
+ },
735
+ {
736
+ "epoch": 1.8019594121763471,
737
+ "grad_norm": 0.003935549408197403,
738
+ "learning_rate": 2.247278382581649e-06,
739
+ "loss": 0.0,
740
+ "step": 2575
741
+ },
742
+ {
743
+ "epoch": 1.8194541637508748,
744
+ "grad_norm": 0.0007768659852445126,
745
+ "learning_rate": 2.052877138413686e-06,
746
+ "loss": 0.0001,
747
+ "step": 2600
748
+ },
749
+ {
750
+ "epoch": 1.8369489153254024,
751
+ "grad_norm": 9.082518577575684,
752
+ "learning_rate": 1.8584758942457235e-06,
753
+ "loss": 0.0007,
754
+ "step": 2625
755
+ },
756
+ {
757
+ "epoch": 1.85444366689993,
758
+ "grad_norm": 0.00017582898726686835,
759
+ "learning_rate": 1.6640746500777608e-06,
760
+ "loss": 0.0,
761
+ "step": 2650
762
+ },
763
+ {
764
+ "epoch": 1.8719384184744576,
765
+ "grad_norm": 0.0005721878260374069,
766
+ "learning_rate": 1.4696734059097982e-06,
767
+ "loss": 0.0,
768
+ "step": 2675
769
+ },
770
+ {
771
+ "epoch": 1.8894331700489853,
772
+ "grad_norm": 0.0014160927385091782,
773
+ "learning_rate": 1.2752721617418353e-06,
774
+ "loss": 0.0,
775
+ "step": 2700
776
+ },
777
+ {
778
+ "epoch": 1.906927921623513,
779
+ "grad_norm": 0.0003491580719128251,
780
+ "learning_rate": 1.0808709175738726e-06,
781
+ "loss": 0.0,
782
+ "step": 2725
783
+ },
784
+ {
785
+ "epoch": 1.9244226731980407,
786
+ "grad_norm": 0.00031783856684342027,
787
+ "learning_rate": 8.864696734059098e-07,
788
+ "loss": 0.0,
789
+ "step": 2750
790
+ },
791
+ {
792
+ "epoch": 1.9419174247725683,
793
+ "grad_norm": 0.0011913523776456714,
794
+ "learning_rate": 6.920684292379472e-07,
795
+ "loss": 0.0001,
796
+ "step": 2775
797
+ },
798
+ {
799
+ "epoch": 1.959412176347096,
800
+ "grad_norm": 0.0027918724808841944,
801
+ "learning_rate": 4.976671850699845e-07,
802
+ "loss": 0.0,
803
+ "step": 2800
804
+ },
805
+ {
806
+ "epoch": 1.9769069279216236,
807
+ "grad_norm": 0.002089190762490034,
808
+ "learning_rate": 3.032659409020218e-07,
809
+ "loss": 0.0037,
810
+ "step": 2825
811
+ },
812
+ {
813
+ "epoch": 1.9944016794961512,
814
+ "grad_norm": 0.0003496000135783106,
815
+ "learning_rate": 1.088646967340591e-07,
816
+ "loss": 0.0,
817
+ "step": 2850
818
+ },
819
+ {
820
+ "epoch": 2.0,
821
+ "eval_exact_match": 99.7703,
822
+ "eval_f1": 99.8851,
823
+ "eval_loss": 6.235438195290044e-05,
824
+ "eval_runtime": 18.4334,
825
+ "eval_samples_per_second": 77.143,
826
+ "eval_steps_per_second": 9.656,
827
+ "step": 2858
828
+ }
829
+ ],
830
+ "logging_steps": 25,
831
+ "max_steps": 2858,
832
+ "num_input_tokens_seen": 0,
833
+ "num_train_epochs": 2,
834
+ "save_steps": 500,
835
+ "stateful_callbacks": {
836
+ "EarlyStoppingCallback": {
837
+ "args": {
838
+ "early_stopping_patience": 5,
839
+ "early_stopping_threshold": 0.01
840
+ },
841
+ "attributes": {
842
+ "early_stopping_patience_counter": 1
843
+ }
844
+ },
845
+ "TrainerControl": {
846
+ "args": {
847
+ "should_epoch_stop": false,
848
+ "should_evaluate": false,
849
+ "should_log": false,
850
+ "should_save": true,
851
+ "should_training_stop": true
852
+ },
853
+ "attributes": {}
854
+ }
855
+ },
856
+ "total_flos": 2986621929492480.0,
857
+ "train_batch_size": 4,
858
+ "trial_name": null,
859
+ "trial_params": null
860
+ }
checkpoint-2858/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10234dc1d4393c789f6886e8c45d4fa1c50db25477b6f263223c0983b048889
3
+ size 5368
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "deepset/roberta-base-squad2",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "language": "english",
17
+ "layer_norm_eps": 1e-05,
18
+ "max_position_embeddings": 514,
19
+ "model_type": "roberta",
20
+ "name": "Roberta",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "pad_token_id": 1,
24
+ "position_embedding_type": "absolute",
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.48.0",
27
+ "type_vocab_size": 1,
28
+ "use_cache": true,
29
+ "vocab_size": 50265
30
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d9d1bde8b7624d84887bf84e5395e3cc1556658d2a7677a1b32e7734e09fa24
3
+ size 496250232
runs/Apr09_07-21-59_d72aa199956d/events.out.tfevents.1744183320.d72aa199956d.7773.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:354d4503941f38c5d3d535ba68c23ba9742432dbf0255551b50590e0fcbccbeb
3
- size 4184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43cb6364399072b4e123fbbbce9a9776d97d2ee2c70d7a49a97db2367d6ab856
3
+ size 30289
runs/Apr09_07-21-59_d72aa199956d/events.out.tfevents.1744183846.d72aa199956d.7773.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b204507b6743200391c0ea5130bb5c20da17f7186637e6e4b2d4868c56707705
3
+ size 460
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": false,
47
+ "cls_token": "<s>",
48
+ "do_lower_case": false,
49
+ "eos_token": "</s>",
50
+ "errors": "replace",
51
+ "extra_special_tokens": {},
52
+ "full_tokenizer_file": null,
53
+ "mask_token": "<mask>",
54
+ "model_max_length": 512,
55
+ "pad_token": "<pad>",
56
+ "sep_token": "</s>",
57
+ "tokenizer_class": "RobertaTokenizer",
58
+ "trim_offsets": true,
59
+ "unk_token": "<unk>"
60
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10234dc1d4393c789f6886e8c45d4fa1c50db25477b6f263223c0983b048889
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "VOKulus/test",
3
+ "model": "deepset/roberta-base-squad2",
4
+ "lr": 2e-05,
5
+ "epochs": 2,
6
+ "max_seq_length": 512,
7
+ "max_doc_stride": 128,
8
+ "batch_size": 4,
9
+ "warmup_ratio": 0.1,
10
+ "gradient_accumulation": 1,
11
+ "optimizer": "adamw_torch",
12
+ "scheduler": "linear",
13
+ "weight_decay": 0.0,
14
+ "max_grad_norm": 1.0,
15
+ "seed": 42,
16
+ "train_split": "train",
17
+ "valid_split": "validation",
18
+ "text_column": "context",
19
+ "question_column": "question",
20
+ "answer_column": "answer",
21
+ "logging_steps": -1,
22
+ "project_name": "my-model-test-roberta",
23
+ "auto_find_batch_size": false,
24
+ "mixed_precision": "fp16",
25
+ "save_total_limit": 1,
26
+ "push_to_hub": true,
27
+ "eval_strategy": "epoch",
28
+ "username": "VOKulus",
29
+ "log": "tensorboard",
30
+ "early_stopping_patience": 5,
31
+ "early_stopping_threshold": 0.01
32
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff