nlparabic commited on
Commit
643136f
·
verified ·
1 Parent(s): a1fb50d

End of training

Browse files
all_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
  "epoch": 3.0,
3
  "eval_bleu": 0.2510920700175718,
4
- "eval_loss": 2.5200681686401367,
5
- "eval_rouge1": 0.45339177611687703,
6
- "eval_rouge2": 0.21369428759421394,
7
- "eval_rougeL": 0.39838760416988706,
8
- "eval_runtime": 29.7946,
9
  "eval_samples": 884,
10
- "eval_samples_per_second": 29.67,
11
- "eval_steps_per_second": 3.726,
12
- "perplexity": 12.42944393298918,
13
  "total_flos": 4151799742464000.0,
14
- "train_loss": 0.0,
15
- "train_runtime": 0.0936,
16
  "train_samples": 3531,
17
- "train_samples_per_second": 113198.697,
18
- "train_steps_per_second": 14169.874
19
  }
 
1
  {
2
  "epoch": 3.0,
3
  "eval_bleu": 0.2510920700175718,
4
+ "eval_loss": 2.5200693607330322,
5
+ "eval_rouge1": 0.45339120041307046,
6
+ "eval_rouge2": 0.21369562389429725,
7
+ "eval_rougeL": 0.3983722007696566,
8
+ "eval_runtime": 28.0888,
9
  "eval_samples": 884,
10
+ "eval_samples_per_second": 31.472,
11
+ "eval_steps_per_second": 3.952,
12
+ "perplexity": 12.42945875004982,
13
  "total_flos": 4151799742464000.0,
14
+ "train_loss": 3.14277273785115,
15
+ "train_runtime": 586.188,
16
  "train_samples": 3531,
17
+ "train_samples_per_second": 18.071,
18
+ "train_steps_per_second": 2.262
19
  }
egy_training_log.txt CHANGED
@@ -844,3 +844,5 @@ INFO:root:Epoch 2.0: Train Loss = 4.0723, Eval Loss = 2.958087921142578
844
  INFO:absl:Using default tokenizer.
845
  INFO:root:Epoch 3.0: Train Loss = 2.8055, Eval Loss = 2.606330394744873
846
  INFO:absl:Using default tokenizer.
 
 
 
844
  INFO:absl:Using default tokenizer.
845
  INFO:root:Epoch 3.0: Train Loss = 2.8055, Eval Loss = 2.606330394744873
846
  INFO:absl:Using default tokenizer.
847
+ INFO:__main__:*** Evaluate ***
848
+ INFO:absl:Using default tokenizer.
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 3.0,
3
  "eval_bleu": 0.2510920700175718,
4
- "eval_loss": 2.5200681686401367,
5
- "eval_rouge1": 0.45339177611687703,
6
- "eval_rouge2": 0.21369428759421394,
7
- "eval_rougeL": 0.39838760416988706,
8
- "eval_runtime": 29.7946,
9
  "eval_samples": 884,
10
- "eval_samples_per_second": 29.67,
11
- "eval_steps_per_second": 3.726,
12
- "perplexity": 12.42944393298918
13
  }
 
1
  {
2
  "epoch": 3.0,
3
  "eval_bleu": 0.2510920700175718,
4
+ "eval_loss": 2.5200693607330322,
5
+ "eval_rouge1": 0.45339120041307046,
6
+ "eval_rouge2": 0.21369562389429725,
7
+ "eval_rougeL": 0.3983722007696566,
8
+ "eval_runtime": 28.0888,
9
  "eval_samples": 884,
10
+ "eval_samples_per_second": 31.472,
11
+ "eval_steps_per_second": 3.952,
12
+ "perplexity": 12.42945875004982
13
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 4151799742464000.0,
4
- "train_loss": 0.0,
5
- "train_runtime": 0.0936,
6
  "train_samples": 3531,
7
- "train_samples_per_second": 113198.697,
8
- "train_steps_per_second": 14169.874
9
  }
 
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 4151799742464000.0,
4
+ "train_loss": 3.14277273785115,
5
+ "train_runtime": 586.188,
6
  "train_samples": 3531,
7
+ "train_samples_per_second": 18.071,
8
+ "train_steps_per_second": 2.262
9
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 2.5200681686401367,
3
  "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_egy/checkpoint-1326",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.6992121934890747,
14
  "learning_rate": 4.4200000000000004e-05,
15
  "loss": 4.0723,
16
  "step": 442
@@ -22,33 +22,33 @@
22
  "eval_rouge1": 0.3912678429308313,
23
  "eval_rouge2": 0.16481568948149655,
24
  "eval_rougeL": 0.32008248461173605,
25
- "eval_runtime": 28.6854,
26
- "eval_samples_per_second": 30.817,
27
- "eval_steps_per_second": 3.87,
28
  "step": 442
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 1.676450490951538,
33
  "learning_rate": 2.6755447941888623e-05,
34
  "loss": 2.8055,
35
  "step": 884
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_bleu": 0.24154541958578588,
40
- "eval_loss": 2.6063287258148193,
41
- "eval_rouge1": 0.43470063350740673,
42
  "eval_rouge2": 0.19710102219495218,
43
- "eval_rougeL": 0.3776728970262111,
44
- "eval_runtime": 28.998,
45
- "eval_samples_per_second": 30.485,
46
- "eval_steps_per_second": 3.828,
47
  "step": 884
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 1.5551185607910156,
52
  "learning_rate": 0.0,
53
  "loss": 2.5505,
54
  "step": 1326
@@ -56,23 +56,23 @@
56
  {
57
  "epoch": 3.0,
58
  "eval_bleu": 0.2510920700175718,
59
- "eval_loss": 2.5200681686401367,
60
- "eval_rouge1": 0.45339177611687703,
61
- "eval_rouge2": 0.21369428759421394,
62
- "eval_rougeL": 0.39838760416988706,
63
- "eval_runtime": 29.0706,
64
- "eval_samples_per_second": 30.409,
65
- "eval_steps_per_second": 3.818,
66
  "step": 1326
67
  },
68
  {
69
  "epoch": 3.0,
70
  "step": 1326,
71
  "total_flos": 4151799742464000.0,
72
- "train_loss": 0.0,
73
- "train_runtime": 0.0936,
74
- "train_samples_per_second": 113198.697,
75
- "train_steps_per_second": 14169.874
76
  }
77
  ],
78
  "logging_steps": 500,
 
1
  {
2
+ "best_metric": 2.5200693607330322,
3
  "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_egy/checkpoint-1326",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 1.6992123126983643,
14
  "learning_rate": 4.4200000000000004e-05,
15
  "loss": 4.0723,
16
  "step": 442
 
22
  "eval_rouge1": 0.3912678429308313,
23
  "eval_rouge2": 0.16481568948149655,
24
  "eval_rougeL": 0.32008248461173605,
25
+ "eval_runtime": 27.8657,
26
+ "eval_samples_per_second": 31.724,
27
+ "eval_steps_per_second": 3.983,
28
  "step": 442
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 1.676450252532959,
33
  "learning_rate": 2.6755447941888623e-05,
34
  "loss": 2.8055,
35
  "step": 884
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_bleu": 0.24154624358915514,
40
+ "eval_loss": 2.606330394744873,
41
+ "eval_rouge1": 0.43470628711649895,
42
  "eval_rouge2": 0.19710102219495218,
43
+ "eval_rougeL": 0.3776869994743499,
44
+ "eval_runtime": 28.1779,
45
+ "eval_samples_per_second": 31.372,
46
+ "eval_steps_per_second": 3.939,
47
  "step": 884
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 1.5551155805587769,
52
  "learning_rate": 0.0,
53
  "loss": 2.5505,
54
  "step": 1326
 
56
  {
57
  "epoch": 3.0,
58
  "eval_bleu": 0.2510920700175718,
59
+ "eval_loss": 2.5200693607330322,
60
+ "eval_rouge1": 0.45339120041307046,
61
+ "eval_rouge2": 0.21369562389429725,
62
+ "eval_rougeL": 0.3983722007696566,
63
+ "eval_runtime": 28.2169,
64
+ "eval_samples_per_second": 31.329,
65
+ "eval_steps_per_second": 3.934,
66
  "step": 1326
67
  },
68
  {
69
  "epoch": 3.0,
70
  "step": 1326,
71
  "total_flos": 4151799742464000.0,
72
+ "train_loss": 3.14277273785115,
73
+ "train_runtime": 586.188,
74
+ "train_samples_per_second": 18.071,
75
+ "train_steps_per_second": 2.262
76
  }
77
  ],
78
  "logging_steps": 500,