nlparabic commited on
Commit
2879749
·
verified ·
1 Parent(s): 45df639

End of training

Browse files
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.9003
22
- - Bleu: 0.3478
23
- - Rouge1: 0.6166
24
- - Rouge2: 0.3549
25
- - Rougel: 0.6125
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.8298
22
+ - Bleu: 0.3390
23
+ - Rouge1: 0.5962
24
+ - Rouge2: 0.3298
25
+ - Rougel: 0.5921
26
 
27
  ## Model description
28
 
all_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "epoch": 14.0,
3
- "eval_bleu": 0.24587227576979195,
4
- "eval_loss": 0.7315686941146851,
5
- "eval_rouge1": 0.6017197427075045,
6
- "eval_rouge2": 0.3518746485163118,
7
- "eval_rougeL": 0.5982542515796094,
8
- "eval_runtime": 47.0307,
9
- "eval_samples": 14212,
10
- "eval_samples_per_second": 302.185,
11
- "eval_steps_per_second": 37.784,
12
- "perplexity": 2.078338328926906,
13
- "total_flos": 5.1991496589312e+16,
14
- "train_loss": 0.09352104669935263,
15
- "train_runtime": 3380.5822,
16
- "train_samples": 56851,
17
- "train_samples_per_second": 336.339,
18
- "train_steps_per_second": 42.046
19
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_bleu": 0.3389516198368023,
4
+ "eval_loss": 0.8298296928405762,
5
+ "eval_rouge1": 0.5961824107324037,
6
+ "eval_rouge2": 0.32984558195042607,
7
+ "eval_rougeL": 0.5921451862516953,
8
+ "eval_runtime": 46.8943,
9
+ "eval_samples": 14209,
10
+ "eval_samples_per_second": 303.001,
11
+ "eval_steps_per_second": 37.894,
12
+ "perplexity": 2.2929282049203015,
13
+ "total_flos": 2.9701587861504e+16,
14
+ "train_loss": 0.6060978588128408,
15
+ "train_runtime": 4864.2589,
16
+ "train_samples": 56836,
17
+ "train_samples_per_second": 233.688,
18
+ "train_steps_per_second": 29.213
19
  }
egy_training_log.txt CHANGED
@@ -160,3 +160,5 @@ INFO:root:Epoch 7.0: Train Loss = 0.4559, Eval Loss = 0.8647022843360901
160
  INFO:absl:Using default tokenizer.
161
  INFO:root:Epoch 8.0: Train Loss = 0.4104, Eval Loss = 0.8769952058792114
162
  INFO:absl:Using default tokenizer.
 
 
 
160
  INFO:absl:Using default tokenizer.
161
  INFO:root:Epoch 8.0: Train Loss = 0.4104, Eval Loss = 0.8769952058792114
162
  INFO:absl:Using default tokenizer.
163
+ INFO:__main__:*** Evaluate ***
164
+ INFO:absl:Using default tokenizer.
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 14.0,
3
- "eval_bleu": 0.24587227576979195,
4
- "eval_loss": 0.7315686941146851,
5
- "eval_rouge1": 0.6017197427075045,
6
- "eval_rouge2": 0.3518746485163118,
7
- "eval_rougeL": 0.5982542515796094,
8
- "eval_runtime": 47.0307,
9
- "eval_samples": 14212,
10
- "eval_samples_per_second": 302.185,
11
- "eval_steps_per_second": 37.784,
12
- "perplexity": 2.078338328926906
13
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_bleu": 0.3389516198368023,
4
+ "eval_loss": 0.8298296928405762,
5
+ "eval_rouge1": 0.5961824107324037,
6
+ "eval_rouge2": 0.32984558195042607,
7
+ "eval_rougeL": 0.5921451862516953,
8
+ "eval_runtime": 46.8943,
9
+ "eval_samples": 14209,
10
+ "eval_samples_per_second": 303.001,
11
+ "eval_steps_per_second": 37.894,
12
+ "perplexity": 2.2929282049203015
13
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 14.0,
3
- "total_flos": 5.1991496589312e+16,
4
- "train_loss": 0.09352104669935263,
5
- "train_runtime": 3380.5822,
6
- "train_samples": 56851,
7
- "train_samples_per_second": 336.339,
8
- "train_steps_per_second": 42.046
9
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "total_flos": 2.9701587861504e+16,
4
+ "train_loss": 0.6060978588128408,
5
+ "train_runtime": 4864.2589,
6
+ "train_samples": 56836,
7
+ "train_samples_per_second": 233.688,
8
+ "train_steps_per_second": 29.213
9
  }
train_vs_val_loss.png CHANGED
trainer_state.json CHANGED
@@ -1,291 +1,177 @@
1
  {
2
- "best_metric": 0.7315686941146851,
3
- "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg/checkpoint-28428",
4
- "epoch": 14.0,
5
  "eval_steps": 500,
6
- "global_step": 99498,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 2.742945671081543,
14
- "learning_rate": 4.7667678621858235e-05,
15
- "loss": 1.1436,
16
- "step": 7107
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_bleu": 0.1900415894207328,
21
- "eval_loss": 0.8277140259742737,
22
- "eval_rouge1": 0.5211536867388353,
23
- "eval_rouge2": 0.2576275131704426,
24
- "eval_rougeL": 0.5169189427573101,
25
- "eval_runtime": 204.5959,
26
- "eval_samples_per_second": 69.464,
27
- "eval_steps_per_second": 8.685,
28
- "step": 7107
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.031801223754883,
33
- "learning_rate": 4.515885343123411e-05,
34
- "loss": 0.7508,
35
- "step": 14214
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_bleu": 0.22138684401610842,
40
- "eval_loss": 0.7543078064918518,
41
- "eval_rouge1": 0.5674397247471176,
42
- "eval_rouge2": 0.3108337383535441,
43
- "eval_rougeL": 0.5636106781794015,
44
- "eval_runtime": 171.1246,
45
- "eval_samples_per_second": 83.051,
46
- "eval_steps_per_second": 10.384,
47
- "step": 14214
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 1.8590487241744995,
52
- "learning_rate": 4.265002824061e-05,
53
- "loss": 0.6471,
54
- "step": 21321
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_bleu": 0.2374960454489342,
59
- "eval_loss": 0.7337948083877563,
60
- "eval_rouge1": 0.5880985827608463,
61
- "eval_rouge2": 0.33558513842625187,
62
- "eval_rougeL": 0.5844518671510625,
63
- "eval_runtime": 68.7002,
64
- "eval_samples_per_second": 206.87,
65
- "eval_steps_per_second": 25.866,
66
- "step": 21321
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.3944027423858643,
71
- "learning_rate": 4.0141203049985884e-05,
72
- "loss": 0.5713,
73
- "step": 28428
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_bleu": 0.24587227576979195,
78
- "eval_loss": 0.7315686941146851,
79
- "eval_rouge1": 0.6017197427075045,
80
- "eval_rouge2": 0.3518746485163118,
81
- "eval_rougeL": 0.5982542515796094,
82
- "eval_runtime": 62.0721,
83
- "eval_samples_per_second": 228.959,
84
- "eval_steps_per_second": 28.628,
85
- "step": 28428
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 2.199220657348633,
90
- "learning_rate": 3.763237785936176e-05,
91
- "loss": 0.5097,
92
- "step": 35535
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_bleu": 0.24748155317226092,
97
- "eval_loss": 0.7390380501747131,
98
- "eval_rouge1": 0.6058102682046419,
99
- "eval_rouge2": 0.357170685615976,
100
- "eval_rougeL": 0.6021635755679425,
101
- "eval_runtime": 67.8747,
102
- "eval_samples_per_second": 209.386,
103
- "eval_steps_per_second": 26.181,
104
- "step": 35535
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 2.055725574493408,
109
- "learning_rate": 3.512355266873765e-05,
110
- "loss": 0.4573,
111
- "step": 42642
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_bleu": 0.25030630377831276,
116
- "eval_loss": 0.748293399810791,
117
- "eval_rouge1": 0.6103116816448397,
118
- "eval_rouge2": 0.361846050958361,
119
- "eval_rougeL": 0.6066395364597333,
120
- "eval_runtime": 56.4418,
121
- "eval_samples_per_second": 251.799,
122
- "eval_steps_per_second": 31.484,
123
- "step": 42642
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 1.6595733165740967,
128
- "learning_rate": 3.2614727478113526e-05,
129
- "loss": 0.4118,
130
- "step": 49749
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_bleu": 0.2494244558337241,
135
- "eval_loss": 0.7635838389396667,
136
- "eval_rouge1": 0.610621109140437,
137
- "eval_rouge2": 0.3633959713058441,
138
- "eval_rougeL": 0.6069537363647842,
139
- "eval_runtime": 173.9311,
140
- "eval_samples_per_second": 81.711,
141
- "eval_steps_per_second": 10.217,
142
- "step": 49749
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 3.863671064376831,
147
- "learning_rate": 3.010590228748941e-05,
148
- "loss": 0.3725,
149
- "step": 56856
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_bleu": 0.25065847486647275,
154
- "eval_loss": 0.7796261310577393,
155
- "eval_rouge1": 0.6126587801190159,
156
- "eval_rouge2": 0.3659624175392553,
157
- "eval_rougeL": 0.6088959046619336,
158
- "eval_runtime": 170.86,
159
- "eval_samples_per_second": 83.179,
160
- "eval_steps_per_second": 10.4,
161
- "step": 56856
162
  },
163
  {
164
- "epoch": 9.0,
165
- "grad_norm": 3.546931266784668,
166
- "learning_rate": 2.7597077096865293e-05,
167
- "loss": 0.3375,
168
- "step": 63963
169
- },
170
- {
171
- "epoch": 9.0,
172
- "eval_bleu": 0.24908190761452426,
173
- "eval_loss": 0.7973926663398743,
174
- "eval_rouge1": 0.6111967178899901,
175
- "eval_rouge2": 0.36536691181853787,
176
- "eval_rougeL": 0.6074289902749841,
177
- "eval_runtime": 173.0755,
178
- "eval_samples_per_second": 82.114,
179
- "eval_steps_per_second": 10.267,
180
- "step": 63963
181
- },
182
- {
183
- "epoch": 10.0,
184
- "grad_norm": 2.328486442565918,
185
- "learning_rate": 2.5088251906241178e-05,
186
- "loss": 0.3074,
187
- "step": 71070
188
- },
189
- {
190
- "epoch": 10.0,
191
- "eval_bleu": 0.24784142265151649,
192
- "eval_loss": 0.8155524134635925,
193
- "eval_rouge1": 0.6101301030440347,
194
- "eval_rouge2": 0.364415924488246,
195
- "eval_rougeL": 0.606258270290786,
196
- "eval_runtime": 82.2965,
197
- "eval_samples_per_second": 172.693,
198
- "eval_steps_per_second": 21.593,
199
- "step": 71070
200
- },
201
- {
202
- "epoch": 11.0,
203
- "grad_norm": 2.1735293865203857,
204
- "learning_rate": 2.2579426715617057e-05,
205
- "loss": 0.2813,
206
- "step": 78177
207
- },
208
- {
209
- "epoch": 11.0,
210
- "eval_bleu": 0.24877766407334076,
211
- "eval_loss": 0.8325821757316589,
212
- "eval_rouge1": 0.6108646154589945,
213
- "eval_rouge2": 0.36567755520636375,
214
- "eval_rougeL": 0.6071552064184781,
215
- "eval_runtime": 46.9697,
216
- "eval_samples_per_second": 302.578,
217
- "eval_steps_per_second": 37.833,
218
- "step": 78177
219
- },
220
- {
221
- "epoch": 12.0,
222
- "grad_norm": 2.380600929260254,
223
- "learning_rate": 2.0070601524992942e-05,
224
- "loss": 0.2586,
225
- "step": 85284
226
- },
227
- {
228
- "epoch": 12.0,
229
- "eval_bleu": 0.24613125712387748,
230
- "eval_loss": 0.8498404026031494,
231
- "eval_rouge1": 0.6082478770786404,
232
- "eval_rouge2": 0.3630077370066766,
233
- "eval_rougeL": 0.604462114793365,
234
- "eval_runtime": 46.8897,
235
- "eval_samples_per_second": 303.094,
236
- "eval_steps_per_second": 37.897,
237
- "step": 85284
238
- },
239
- {
240
- "epoch": 13.0,
241
- "grad_norm": 2.3239755630493164,
242
- "learning_rate": 1.7561776334368824e-05,
243
- "loss": 0.2392,
244
- "step": 92391
245
- },
246
- {
247
- "epoch": 13.0,
248
- "eval_bleu": 0.24701514132558042,
249
- "eval_loss": 0.867546021938324,
250
- "eval_rouge1": 0.6094521129523778,
251
- "eval_rouge2": 0.364158573223837,
252
- "eval_rougeL": 0.6054673467586391,
253
- "eval_runtime": 169.8152,
254
- "eval_samples_per_second": 83.691,
255
- "eval_steps_per_second": 10.464,
256
- "step": 92391
257
- },
258
- {
259
- "epoch": 14.0,
260
- "grad_norm": 2.4565744400024414,
261
- "learning_rate": 1.5052951143744705e-05,
262
- "loss": 0.2227,
263
- "step": 99498
264
- },
265
- {
266
- "epoch": 14.0,
267
- "eval_bleu": 0.24626598003060837,
268
- "eval_loss": 0.8826896548271179,
269
- "eval_rouge1": 0.6087178921955774,
270
- "eval_rouge2": 0.36314063359901005,
271
- "eval_rougeL": 0.6048623164787159,
272
- "eval_runtime": 170.1544,
273
- "eval_samples_per_second": 83.524,
274
- "eval_steps_per_second": 10.443,
275
- "step": 99498
276
- },
277
- {
278
- "epoch": 14.0,
279
- "step": 99498,
280
- "total_flos": 5.1991496589312e+16,
281
- "train_loss": 0.09352104669935263,
282
- "train_runtime": 3380.5822,
283
- "train_samples_per_second": 336.339,
284
- "train_steps_per_second": 42.046
285
  }
286
  ],
287
  "logging_steps": 500,
288
- "max_steps": 142140,
289
  "num_input_tokens_seen": 0,
290
  "num_train_epochs": 20,
291
  "save_steps": 500,
@@ -310,7 +196,7 @@
310
  "attributes": {}
311
  }
312
  },
313
- "total_flos": 5.1991496589312e+16,
314
  "train_batch_size": 8,
315
  "trial_name": null,
316
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8298296928405762,
3
+ "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg/checkpoint-21315",
4
+ "epoch": 8.0,
5
  "eval_steps": 500,
6
+ "global_step": 56840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 1.2983179092407227,
14
+ "learning_rate": 4.766772598870057e-05,
15
+ "loss": 1.1362,
16
+ "step": 7105
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_bleu": 0.3024315492316285,
21
+ "eval_loss": 0.9122783541679382,
22
+ "eval_rouge1": 0.5275815926361394,
23
+ "eval_rouge2": 0.250835305590573,
24
+ "eval_rougeL": 0.5227642919859343,
25
+ "eval_runtime": 82.2039,
26
+ "eval_samples_per_second": 172.851,
27
+ "eval_steps_per_second": 21.617,
28
+ "step": 7105
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 1.6412582397460938,
33
+ "learning_rate": 4.515889830508475e-05,
34
+ "loss": 0.7501,
35
+ "step": 14210
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_bleu": 0.3299343390399215,
40
+ "eval_loss": 0.8436459302902222,
41
+ "eval_rouge1": 0.5753620498690527,
42
+ "eval_rouge2": 0.30552900678559713,
43
+ "eval_rougeL": 0.5712795722916149,
44
+ "eval_runtime": 47.3241,
45
+ "eval_samples_per_second": 300.248,
46
+ "eval_steps_per_second": 37.55,
47
+ "step": 14210
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 1.4929105043411255,
52
+ "learning_rate": 4.265007062146893e-05,
53
+ "loss": 0.6462,
54
+ "step": 21315
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_bleu": 0.3389516198368023,
59
+ "eval_loss": 0.8298296928405762,
60
+ "eval_rouge1": 0.5961824107324037,
61
+ "eval_rouge2": 0.32984558195042607,
62
+ "eval_rougeL": 0.5921451862516953,
63
+ "eval_runtime": 46.7775,
64
+ "eval_samples_per_second": 303.757,
65
+ "eval_steps_per_second": 37.988,
66
+ "step": 21315
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 2.472024440765381,
71
+ "learning_rate": 4.014124293785311e-05,
72
+ "loss": 0.5705,
73
+ "step": 28420
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_bleu": 0.3443865327631572,
78
+ "eval_loss": 0.8327584862709045,
79
+ "eval_rouge1": 0.6048121716287991,
80
+ "eval_rouge2": 0.3411757857050619,
81
+ "eval_rougeL": 0.6010600637633374,
82
+ "eval_runtime": 51.9902,
83
+ "eval_samples_per_second": 273.301,
84
+ "eval_steps_per_second": 34.18,
85
+ "step": 28420
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 1.5785757303237915,
90
+ "learning_rate": 3.763241525423729e-05,
91
+ "loss": 0.5087,
92
+ "step": 35525
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_bleu": 0.3480087983574886,
97
+ "eval_loss": 0.8406437635421753,
98
+ "eval_rouge1": 0.6133106901142944,
99
+ "eval_rouge2": 0.3512185978691529,
100
+ "eval_rougeL": 0.6095993795603691,
101
+ "eval_runtime": 46.8243,
102
+ "eval_samples_per_second": 303.453,
103
+ "eval_steps_per_second": 37.95,
104
+ "step": 35525
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 1.7464922666549683,
109
+ "learning_rate": 3.5123587570621466e-05,
110
+ "loss": 0.4559,
111
+ "step": 42630
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_bleu": 0.34779219505501374,
116
+ "eval_loss": 0.8647022843360901,
117
+ "eval_rouge1": 0.6132791748034587,
118
+ "eval_rouge2": 0.35198863487088,
119
+ "eval_rougeL": 0.6093736659279629,
120
+ "eval_runtime": 46.987,
121
+ "eval_samples_per_second": 302.403,
122
+ "eval_steps_per_second": 37.819,
123
+ "step": 42630
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 2.2738301753997803,
128
+ "learning_rate": 3.261475988700565e-05,
129
+ "loss": 0.4104,
130
+ "step": 49735
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_bleu": 0.3484641332073553,
135
+ "eval_loss": 0.8769952058792114,
136
+ "eval_rouge1": 0.6147660525828296,
137
+ "eval_rouge2": 0.3532593327836824,
138
+ "eval_rougeL": 0.6107018352641345,
139
+ "eval_runtime": 47.1718,
140
+ "eval_samples_per_second": 301.218,
141
+ "eval_steps_per_second": 37.671,
142
+ "step": 49735
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 1.9538367986679077,
147
+ "learning_rate": 3.010593220338983e-05,
148
+ "loss": 0.3708,
149
+ "step": 56840
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_bleu": 0.3478035270510471,
154
+ "eval_loss": 0.9003333449363708,
155
+ "eval_rouge1": 0.6165900788913368,
156
+ "eval_rouge2": 0.3548552782268418,
157
+ "eval_rougeL": 0.6124632709173936,
158
+ "eval_runtime": 47.008,
159
+ "eval_samples_per_second": 302.268,
160
+ "eval_steps_per_second": 37.802,
161
+ "step": 56840
162
  },
163
  {
164
+ "epoch": 8.0,
165
+ "step": 56840,
166
+ "total_flos": 2.9701587861504e+16,
167
+ "train_loss": 0.6060978588128408,
168
+ "train_runtime": 4864.2589,
169
+ "train_samples_per_second": 233.688,
170
+ "train_steps_per_second": 29.213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  }
172
  ],
173
  "logging_steps": 500,
174
+ "max_steps": 142100,
175
  "num_input_tokens_seen": 0,
176
  "num_train_epochs": 20,
177
  "save_steps": 500,
 
196
  "attributes": {}
197
  }
198
  },
199
+ "total_flos": 2.9701587861504e+16,
200
  "train_batch_size": 8,
201
  "trial_name": null,
202
  "trial_params": null