fpadovani commited on
Commit
1cd0c2e
·
verified ·
1 Parent(s): b59267a

Training in progress, step 32000, checkpoint

Browse files
Files changed (1) hide show
  1. checkpoint-32000/trainer_state.json +48 -48
checkpoint-32000/trainer_state.json CHANGED
@@ -11,9 +11,9 @@
11
  {
12
  "epoch": 2.5,
13
  "eval_loss": 7.232339382171631,
14
- "eval_runtime": 1.6226,
15
- "eval_samples_per_second": 1272.646,
16
- "eval_steps_per_second": 80.118,
17
  "step": 2000
18
  },
19
  {
@@ -26,17 +26,17 @@
26
  {
27
  "epoch": 5.0,
28
  "eval_loss": 5.976784706115723,
29
- "eval_runtime": 1.6382,
30
- "eval_samples_per_second": 1260.5,
31
- "eval_steps_per_second": 79.353,
32
  "step": 4000
33
  },
34
  {
35
  "epoch": 7.5,
36
  "eval_loss": 5.826813220977783,
37
- "eval_runtime": 3.0373,
38
- "eval_samples_per_second": 679.889,
39
- "eval_steps_per_second": 42.802,
40
  "step": 6000
41
  },
42
  {
@@ -49,17 +49,17 @@
49
  {
50
  "epoch": 10.0,
51
  "eval_loss": 5.744414806365967,
52
- "eval_runtime": 3.0316,
53
- "eval_samples_per_second": 681.153,
54
- "eval_steps_per_second": 42.881,
55
  "step": 8000
56
  },
57
  {
58
  "epoch": 12.5,
59
  "eval_loss": 5.670751571655273,
60
- "eval_runtime": 3.1272,
61
- "eval_samples_per_second": 660.333,
62
- "eval_steps_per_second": 41.571,
63
  "step": 10000
64
  },
65
  {
@@ -72,17 +72,17 @@
72
  {
73
  "epoch": 15.0,
74
  "eval_loss": 5.57132625579834,
75
- "eval_runtime": 3.0078,
76
- "eval_samples_per_second": 686.558,
77
- "eval_steps_per_second": 43.222,
78
  "step": 12000
79
  },
80
  {
81
  "epoch": 17.5,
82
  "eval_loss": 5.526998043060303,
83
- "eval_runtime": 3.0343,
84
- "eval_samples_per_second": 680.548,
85
- "eval_steps_per_second": 42.843,
86
  "step": 14000
87
  },
88
  {
@@ -95,17 +95,17 @@
95
  {
96
  "epoch": 20.0,
97
  "eval_loss": 5.448328971862793,
98
- "eval_runtime": 3.0113,
99
- "eval_samples_per_second": 685.757,
100
- "eval_steps_per_second": 43.171,
101
  "step": 16000
102
  },
103
  {
104
  "epoch": 22.5,
105
  "eval_loss": 5.3926005363464355,
106
- "eval_runtime": 3.0569,
107
- "eval_samples_per_second": 675.527,
108
- "eval_steps_per_second": 42.527,
109
  "step": 18000
110
  },
111
  {
@@ -118,17 +118,17 @@
118
  {
119
  "epoch": 25.0,
120
  "eval_loss": 5.231536388397217,
121
- "eval_runtime": 2.8975,
122
- "eval_samples_per_second": 712.671,
123
- "eval_steps_per_second": 44.865,
124
  "step": 20000
125
  },
126
  {
127
  "epoch": 27.5,
128
  "eval_loss": 4.905914306640625,
129
- "eval_runtime": 3.0834,
130
- "eval_samples_per_second": 669.706,
131
- "eval_steps_per_second": 42.161,
132
  "step": 22000
133
  },
134
  {
@@ -141,17 +141,17 @@
141
  {
142
  "epoch": 30.0,
143
  "eval_loss": 4.167966842651367,
144
- "eval_runtime": 2.6664,
145
- "eval_samples_per_second": 774.461,
146
- "eval_steps_per_second": 48.755,
147
  "step": 24000
148
  },
149
  {
150
  "epoch": 32.5,
151
  "eval_loss": 3.6409149169921875,
152
- "eval_runtime": 2.5685,
153
- "eval_samples_per_second": 803.957,
154
- "eval_steps_per_second": 50.612,
155
  "step": 26000
156
  },
157
  {
@@ -164,17 +164,17 @@
164
  {
165
  "epoch": 35.0,
166
  "eval_loss": 3.2063941955566406,
167
- "eval_runtime": 2.8723,
168
- "eval_samples_per_second": 718.927,
169
- "eval_steps_per_second": 45.259,
170
  "step": 28000
171
  },
172
  {
173
  "epoch": 37.5,
174
  "eval_loss": 3.0010480880737305,
175
- "eval_runtime": 2.9312,
176
- "eval_samples_per_second": 704.488,
177
- "eval_steps_per_second": 44.35,
178
  "step": 30000
179
  },
180
  {
@@ -187,9 +187,9 @@
187
  {
188
  "epoch": 40.0,
189
  "eval_loss": 2.850881338119507,
190
- "eval_runtime": 3.0495,
191
- "eval_samples_per_second": 677.15,
192
- "eval_steps_per_second": 42.629,
193
  "step": 32000
194
  }
195
  ],
 
11
  {
12
  "epoch": 2.5,
13
  "eval_loss": 7.232339382171631,
14
+ "eval_runtime": 3.1848,
15
+ "eval_samples_per_second": 648.395,
16
+ "eval_steps_per_second": 40.819,
17
  "step": 2000
18
  },
19
  {
 
26
  {
27
  "epoch": 5.0,
28
  "eval_loss": 5.976784706115723,
29
+ "eval_runtime": 3.0375,
30
+ "eval_samples_per_second": 679.836,
31
+ "eval_steps_per_second": 42.798,
32
  "step": 4000
33
  },
34
  {
35
  "epoch": 7.5,
36
  "eval_loss": 5.826813220977783,
37
+ "eval_runtime": 2.9574,
38
+ "eval_samples_per_second": 698.238,
39
+ "eval_steps_per_second": 43.957,
40
  "step": 6000
41
  },
42
  {
 
49
  {
50
  "epoch": 10.0,
51
  "eval_loss": 5.744414806365967,
52
+ "eval_runtime": 2.5747,
53
+ "eval_samples_per_second": 802.048,
54
+ "eval_steps_per_second": 50.492,
55
  "step": 8000
56
  },
57
  {
58
  "epoch": 12.5,
59
  "eval_loss": 5.670751571655273,
60
+ "eval_runtime": 1.618,
61
+ "eval_samples_per_second": 1276.247,
62
+ "eval_steps_per_second": 80.345,
63
  "step": 10000
64
  },
65
  {
 
72
  {
73
  "epoch": 15.0,
74
  "eval_loss": 5.57132625579834,
75
+ "eval_runtime": 2.872,
76
+ "eval_samples_per_second": 719.021,
77
+ "eval_steps_per_second": 45.265,
78
  "step": 12000
79
  },
80
  {
81
  "epoch": 17.5,
82
  "eval_loss": 5.526998043060303,
83
+ "eval_runtime": 3.0079,
84
+ "eval_samples_per_second": 686.536,
85
+ "eval_steps_per_second": 43.22,
86
  "step": 14000
87
  },
88
  {
 
95
  {
96
  "epoch": 20.0,
97
  "eval_loss": 5.448328971862793,
98
+ "eval_runtime": 2.9966,
99
+ "eval_samples_per_second": 689.104,
100
+ "eval_steps_per_second": 43.382,
101
  "step": 16000
102
  },
103
  {
104
  "epoch": 22.5,
105
  "eval_loss": 5.3926005363464355,
106
+ "eval_runtime": 3.0264,
107
+ "eval_samples_per_second": 682.332,
108
+ "eval_steps_per_second": 42.955,
109
  "step": 18000
110
  },
111
  {
 
118
  {
119
  "epoch": 25.0,
120
  "eval_loss": 5.231536388397217,
121
+ "eval_runtime": 3.0403,
122
+ "eval_samples_per_second": 679.22,
123
+ "eval_steps_per_second": 42.76,
124
  "step": 20000
125
  },
126
  {
127
  "epoch": 27.5,
128
  "eval_loss": 4.905914306640625,
129
+ "eval_runtime": 2.9951,
130
+ "eval_samples_per_second": 689.451,
131
+ "eval_steps_per_second": 43.404,
132
  "step": 22000
133
  },
134
  {
 
141
  {
142
  "epoch": 30.0,
143
  "eval_loss": 4.167966842651367,
144
+ "eval_runtime": 2.4346,
145
+ "eval_samples_per_second": 848.205,
146
+ "eval_steps_per_second": 53.398,
147
  "step": 24000
148
  },
149
  {
150
  "epoch": 32.5,
151
  "eval_loss": 3.6409149169921875,
152
+ "eval_runtime": 2.8198,
153
+ "eval_samples_per_second": 732.313,
154
+ "eval_steps_per_second": 46.102,
155
  "step": 26000
156
  },
157
  {
 
164
  {
165
  "epoch": 35.0,
166
  "eval_loss": 3.2063941955566406,
167
+ "eval_runtime": 2.8452,
168
+ "eval_samples_per_second": 725.772,
169
+ "eval_steps_per_second": 45.69,
170
  "step": 28000
171
  },
172
  {
173
  "epoch": 37.5,
174
  "eval_loss": 3.0010480880737305,
175
+ "eval_runtime": 3.0576,
176
+ "eval_samples_per_second": 675.356,
177
+ "eval_steps_per_second": 42.516,
178
  "step": 30000
179
  },
180
  {
 
187
  {
188
  "epoch": 40.0,
189
  "eval_loss": 2.850881338119507,
190
+ "eval_runtime": 3.0048,
191
+ "eval_samples_per_second": 687.232,
192
+ "eval_steps_per_second": 43.264,
193
  "step": 32000
194
  }
195
  ],