ryanmarten commited on
Commit
ef5714a
·
verified ·
1 Parent(s): 7c2fd01

Training in progress, epoch 2

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2446a8c7b141e438691184c2a5afa46ddf90568e5ddf72b04f6c582eeb5ea8b7
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ecfe7c7a460363e58b26f97818d9809ed2fa932926e332c1bf3363456b10c8e
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d9efdfd55d6747c48d16b9134e0259c9abd7a31d3582c56feddb207db932bf8
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9859578f61ee847a0428f0745fdead6dfd1566f77dc6133aeb5a44dc864d852
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86b7d65cc73b91ce6a994d4c76ce7104d5f80b46edc862113be4f32717790c1d
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2c0aa8cf7287002f4285a137ae031c466f8277e41b700cc0f655d2f38fe242
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b0d68ee8e6e00975260654a8609338114b776db4e8bf8d64556179bd77c1be8
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e274f04aecea9105c0700b3b2cb4f16864a5844151b05c0cfbf1631903bb948b
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -159,3 +159,81 @@
159
  {"current_steps": 159, "total_steps": 390, "loss": 0.2808, "lr": 2.9531374400426158e-05, "epoch": 2.0352, "percentage": 40.77, "elapsed_time": "3:40:42", "remaining_time": "5:20:39"}
160
  {"current_steps": 160, "total_steps": 390, "loss": 0.2833, "lr": 2.9373622017294075e-05, "epoch": 2.048, "percentage": 41.03, "elapsed_time": "3:42:12", "remaining_time": "5:19:26"}
161
  {"current_steps": 161, "total_steps": 390, "loss": 0.2773, "lr": 2.9215118720423375e-05, "epoch": 2.0608, "percentage": 41.28, "elapsed_time": "3:43:33", "remaining_time": "5:17:59"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  {"current_steps": 159, "total_steps": 390, "loss": 0.2808, "lr": 2.9531374400426158e-05, "epoch": 2.0352, "percentage": 40.77, "elapsed_time": "3:40:42", "remaining_time": "5:20:39"}
160
  {"current_steps": 160, "total_steps": 390, "loss": 0.2833, "lr": 2.9373622017294075e-05, "epoch": 2.048, "percentage": 41.03, "elapsed_time": "3:42:12", "remaining_time": "5:19:26"}
161
  {"current_steps": 161, "total_steps": 390, "loss": 0.2773, "lr": 2.9215118720423375e-05, "epoch": 2.0608, "percentage": 41.28, "elapsed_time": "3:43:33", "remaining_time": "5:17:59"}
162
+ {"current_steps": 162, "total_steps": 390, "loss": 0.2647, "lr": 2.9055877207392752e-05, "epoch": 2.0736, "percentage": 41.54, "elapsed_time": "3:44:49", "remaining_time": "5:16:24"}
163
+ {"current_steps": 163, "total_steps": 390, "loss": 0.2479, "lr": 2.8895910234918828e-05, "epoch": 2.0864, "percentage": 41.79, "elapsed_time": "3:45:52", "remaining_time": "5:14:34"}
164
+ {"current_steps": 164, "total_steps": 390, "loss": 0.285, "lr": 2.873523061783426e-05, "epoch": 2.0992, "percentage": 42.05, "elapsed_time": "3:47:23", "remaining_time": "5:13:20"}
165
+ {"current_steps": 165, "total_steps": 390, "loss": 0.279, "lr": 2.8573851228061084e-05, "epoch": 2.112, "percentage": 42.31, "elapsed_time": "3:48:40", "remaining_time": "5:11:49"}
166
+ {"current_steps": 166, "total_steps": 390, "loss": 0.2526, "lr": 2.8411784993579633e-05, "epoch": 2.1248, "percentage": 42.56, "elapsed_time": "3:49:50", "remaining_time": "5:10:08"}
167
+ {"current_steps": 167, "total_steps": 390, "loss": 0.2704, "lr": 2.8249044897392814e-05, "epoch": 2.1376, "percentage": 42.82, "elapsed_time": "3:51:25", "remaining_time": "5:09:01"}
168
+ {"current_steps": 168, "total_steps": 390, "loss": 0.2689, "lr": 2.80856439764861e-05, "epoch": 2.1504, "percentage": 43.08, "elapsed_time": "3:52:59", "remaining_time": "5:07:53"}
169
+ {"current_steps": 169, "total_steps": 390, "loss": 0.2731, "lr": 2.792159532078314e-05, "epoch": 2.1632, "percentage": 43.33, "elapsed_time": "3:54:29", "remaining_time": "5:06:38"}
170
+ {"current_steps": 170, "total_steps": 390, "loss": 0.2856, "lr": 2.77569120720971e-05, "epoch": 2.176, "percentage": 43.59, "elapsed_time": "3:56:13", "remaining_time": "5:05:42"}
171
+ {"current_steps": 171, "total_steps": 390, "loss": 0.2808, "lr": 2.7591607423077932e-05, "epoch": 2.1888, "percentage": 43.85, "elapsed_time": "3:57:36", "remaining_time": "5:04:18"}
172
+ {"current_steps": 172, "total_steps": 390, "loss": 0.2346, "lr": 2.7425694616155474e-05, "epoch": 2.2016, "percentage": 44.1, "elapsed_time": "3:58:39", "remaining_time": "5:02:28"}
173
+ {"current_steps": 173, "total_steps": 390, "loss": 0.2695, "lr": 2.7259186942478656e-05, "epoch": 2.2144, "percentage": 44.36, "elapsed_time": "4:00:00", "remaining_time": "5:01:02"}
174
+ {"current_steps": 174, "total_steps": 390, "loss": 0.2791, "lr": 2.7092097740850712e-05, "epoch": 2.2272, "percentage": 44.62, "elapsed_time": "4:01:24", "remaining_time": "4:59:40"}
175
+ {"current_steps": 175, "total_steps": 390, "loss": 0.2665, "lr": 2.692444039666066e-05, "epoch": 2.24, "percentage": 44.87, "elapsed_time": "4:02:46", "remaining_time": "4:58:15"}
176
+ {"current_steps": 176, "total_steps": 390, "loss": 0.2829, "lr": 2.6756228340810946e-05, "epoch": 2.2528, "percentage": 45.13, "elapsed_time": "4:04:11", "remaining_time": "4:56:54"}
177
+ {"current_steps": 177, "total_steps": 390, "loss": 0.2634, "lr": 2.6587475048641596e-05, "epoch": 2.2656, "percentage": 45.38, "elapsed_time": "4:05:37", "remaining_time": "4:55:34"}
178
+ {"current_steps": 178, "total_steps": 390, "loss": 0.3131, "lr": 2.6418194038850634e-05, "epoch": 2.2784, "percentage": 45.64, "elapsed_time": "4:07:05", "remaining_time": "4:54:16"}
179
+ {"current_steps": 179, "total_steps": 390, "loss": 0.2644, "lr": 2.624839887241115e-05, "epoch": 2.2912, "percentage": 45.9, "elapsed_time": "4:08:29", "remaining_time": "4:52:54"}
180
+ {"current_steps": 180, "total_steps": 390, "loss": 0.2578, "lr": 2.607810315148494e-05, "epoch": 2.304, "percentage": 46.15, "elapsed_time": "4:09:54", "remaining_time": "4:51:33"}
181
+ {"current_steps": 181, "total_steps": 390, "loss": 0.2736, "lr": 2.5907320518332827e-05, "epoch": 2.3168, "percentage": 46.41, "elapsed_time": "4:11:10", "remaining_time": "4:50:01"}
182
+ {"current_steps": 182, "total_steps": 390, "loss": 0.2701, "lr": 2.5736064654221808e-05, "epoch": 2.3296, "percentage": 46.67, "elapsed_time": "4:12:30", "remaining_time": "4:48:34"}
183
+ {"current_steps": 183, "total_steps": 390, "loss": 0.2751, "lr": 2.5564349278329056e-05, "epoch": 2.3424, "percentage": 46.92, "elapsed_time": "4:13:51", "remaining_time": "4:47:09"}
184
+ {"current_steps": 184, "total_steps": 390, "loss": 0.2819, "lr": 2.539218814664288e-05, "epoch": 2.3552, "percentage": 47.18, "elapsed_time": "4:15:21", "remaining_time": "4:45:52"}
185
+ {"current_steps": 185, "total_steps": 390, "loss": 0.2711, "lr": 2.521959505086075e-05, "epoch": 2.368, "percentage": 47.44, "elapsed_time": "4:16:36", "remaining_time": "4:44:21"}
186
+ {"current_steps": 186, "total_steps": 390, "loss": 0.2708, "lr": 2.5046583817284437e-05, "epoch": 2.3808, "percentage": 47.69, "elapsed_time": "4:18:07", "remaining_time": "4:43:05"}
187
+ {"current_steps": 187, "total_steps": 390, "loss": 0.268, "lr": 2.487316830571244e-05, "epoch": 2.3936, "percentage": 47.95, "elapsed_time": "4:19:25", "remaining_time": "4:41:36"}
188
+ {"current_steps": 188, "total_steps": 390, "loss": 0.271, "lr": 2.4699362408329646e-05, "epoch": 2.4064, "percentage": 48.21, "elapsed_time": "4:20:40", "remaining_time": "4:40:05"}
189
+ {"current_steps": 189, "total_steps": 390, "loss": 0.2658, "lr": 2.4525180048594452e-05, "epoch": 2.4192, "percentage": 48.46, "elapsed_time": "4:22:08", "remaining_time": "4:38:47"}
190
+ {"current_steps": 190, "total_steps": 390, "loss": 0.2507, "lr": 2.435063518012335e-05, "epoch": 2.432, "percentage": 48.72, "elapsed_time": "4:23:24", "remaining_time": "4:37:16"}
191
+ {"current_steps": 191, "total_steps": 390, "loss": 0.269, "lr": 2.4175741785573177e-05, "epoch": 2.4448, "percentage": 48.97, "elapsed_time": "4:24:50", "remaining_time": "4:35:55"}
192
+ {"current_steps": 192, "total_steps": 390, "loss": 0.2656, "lr": 2.4000513875520892e-05, "epoch": 2.4576000000000002, "percentage": 49.23, "elapsed_time": "4:26:18", "remaining_time": "4:34:37"}
193
+ {"current_steps": 193, "total_steps": 390, "loss": 0.2488, "lr": 2.3824965487341247e-05, "epoch": 2.4704, "percentage": 49.49, "elapsed_time": "4:27:28", "remaining_time": "4:33:01"}
194
+ {"current_steps": 194, "total_steps": 390, "loss": 0.2519, "lr": 2.3649110684082258e-05, "epoch": 2.4832, "percentage": 49.74, "elapsed_time": "4:28:43", "remaining_time": "4:31:29"}
195
+ {"current_steps": 195, "total_steps": 390, "loss": 0.2661, "lr": 2.3472963553338614e-05, "epoch": 2.496, "percentage": 50.0, "elapsed_time": "4:30:17", "remaining_time": "4:30:17"}
196
+ {"current_steps": 196, "total_steps": 390, "loss": 0.2788, "lr": 2.3296538206123134e-05, "epoch": 2.5088, "percentage": 50.26, "elapsed_time": "4:31:40", "remaining_time": "4:28:53"}
197
+ {"current_steps": 197, "total_steps": 390, "loss": 0.2999, "lr": 2.311984877573636e-05, "epoch": 2.5216, "percentage": 50.51, "elapsed_time": "4:33:11", "remaining_time": "4:27:38"}
198
+ {"current_steps": 198, "total_steps": 390, "loss": 0.2509, "lr": 2.2942909416634326e-05, "epoch": 2.5343999999999998, "percentage": 50.77, "elapsed_time": "4:34:22", "remaining_time": "4:26:03"}
199
+ {"current_steps": 199, "total_steps": 390, "loss": 0.2765, "lr": 2.2765734303294666e-05, "epoch": 2.5472, "percentage": 51.03, "elapsed_time": "4:35:58", "remaining_time": "4:24:53"}
200
+ {"current_steps": 200, "total_steps": 390, "loss": 0.2331, "lr": 2.2588337629081107e-05, "epoch": 2.56, "percentage": 51.28, "elapsed_time": "4:37:10", "remaining_time": "4:23:19"}
201
+ {"current_steps": 201, "total_steps": 390, "loss": 0.2693, "lr": 2.2410733605106462e-05, "epoch": 2.5728, "percentage": 51.54, "elapsed_time": "4:38:34", "remaining_time": "4:21:56"}
202
+ {"current_steps": 202, "total_steps": 390, "loss": 0.2634, "lr": 2.2232936459094158e-05, "epoch": 2.5856, "percentage": 51.79, "elapsed_time": "4:39:51", "remaining_time": "4:20:27"}
203
+ {"current_steps": 203, "total_steps": 390, "loss": 0.2406, "lr": 2.205496043423849e-05, "epoch": 2.5984, "percentage": 52.05, "elapsed_time": "4:40:58", "remaining_time": "4:18:49"}
204
+ {"current_steps": 204, "total_steps": 390, "loss": 0.2685, "lr": 2.1876819788063586e-05, "epoch": 2.6112, "percentage": 52.31, "elapsed_time": "4:42:20", "remaining_time": "4:17:25"}
205
+ {"current_steps": 205, "total_steps": 390, "loss": 0.2647, "lr": 2.16985287912813e-05, "epoch": 2.624, "percentage": 52.56, "elapsed_time": "4:43:35", "remaining_time": "4:15:55"}
206
+ {"current_steps": 206, "total_steps": 390, "loss": 0.2717, "lr": 2.1520101726647922e-05, "epoch": 2.6368, "percentage": 52.82, "elapsed_time": "4:44:46", "remaining_time": "4:14:21"}
207
+ {"current_steps": 207, "total_steps": 390, "loss": 0.255, "lr": 2.1341552887820048e-05, "epoch": 2.6496, "percentage": 53.08, "elapsed_time": "4:46:07", "remaining_time": "4:12:56"}
208
+ {"current_steps": 208, "total_steps": 390, "loss": 0.2864, "lr": 2.1162896578209517e-05, "epoch": 2.6624, "percentage": 53.33, "elapsed_time": "4:47:28", "remaining_time": "4:11:32"}
209
+ {"current_steps": 209, "total_steps": 390, "loss": 0.2685, "lr": 2.0984147109837564e-05, "epoch": 2.6752000000000002, "percentage": 53.59, "elapsed_time": "4:48:44", "remaining_time": "4:10:03"}
210
+ {"current_steps": 210, "total_steps": 390, "loss": 0.2719, "lr": 2.0805318802188307e-05, "epoch": 2.6879999999999997, "percentage": 53.85, "elapsed_time": "4:50:04", "remaining_time": "4:08:37"}
211
+ {"current_steps": 211, "total_steps": 390, "loss": 0.2717, "lr": 2.0626425981061608e-05, "epoch": 2.7008, "percentage": 54.1, "elapsed_time": "4:51:25", "remaining_time": "4:07:13"}
212
+ {"current_steps": 212, "total_steps": 390, "loss": 0.2765, "lr": 2.0447482977425465e-05, "epoch": 2.7136, "percentage": 54.36, "elapsed_time": "4:52:56", "remaining_time": "4:05:57"}
213
+ {"current_steps": 213, "total_steps": 390, "loss": 0.2786, "lr": 2.0268504126267952e-05, "epoch": 2.7264, "percentage": 54.62, "elapsed_time": "4:54:28", "remaining_time": "4:04:42"}
214
+ {"current_steps": 214, "total_steps": 390, "loss": 0.2471, "lr": 2.008950376544887e-05, "epoch": 2.7392, "percentage": 54.87, "elapsed_time": "4:55:56", "remaining_time": "4:03:23"}
215
+ {"current_steps": 215, "total_steps": 390, "loss": 0.2575, "lr": 1.9910496234551132e-05, "epoch": 2.752, "percentage": 55.13, "elapsed_time": "4:57:19", "remaining_time": "4:02:00"}
216
+ {"current_steps": 216, "total_steps": 390, "loss": 0.2744, "lr": 1.9731495873732055e-05, "epoch": 2.7648, "percentage": 55.38, "elapsed_time": "4:58:35", "remaining_time": "4:00:32"}
217
+ {"current_steps": 217, "total_steps": 390, "loss": 0.2586, "lr": 1.9552517022574542e-05, "epoch": 2.7776, "percentage": 55.64, "elapsed_time": "5:00:01", "remaining_time": "3:59:11"}
218
+ {"current_steps": 218, "total_steps": 390, "loss": 0.2627, "lr": 1.93735740189384e-05, "epoch": 2.7904, "percentage": 55.9, "elapsed_time": "5:01:25", "remaining_time": "3:57:49"}
219
+ {"current_steps": 219, "total_steps": 390, "loss": 0.2906, "lr": 1.9194681197811703e-05, "epoch": 2.8032, "percentage": 56.15, "elapsed_time": "5:02:41", "remaining_time": "3:56:20"}
220
+ {"current_steps": 220, "total_steps": 390, "loss": 0.2622, "lr": 1.901585289016244e-05, "epoch": 2.816, "percentage": 56.41, "elapsed_time": "5:03:49", "remaining_time": "3:54:46"}
221
+ {"current_steps": 221, "total_steps": 390, "loss": 0.2495, "lr": 1.8837103421790486e-05, "epoch": 2.8288, "percentage": 56.67, "elapsed_time": "5:04:57", "remaining_time": "3:53:12"}
222
+ {"current_steps": 222, "total_steps": 390, "loss": 0.2746, "lr": 1.8658447112179952e-05, "epoch": 2.8416, "percentage": 56.92, "elapsed_time": "5:06:25", "remaining_time": "3:51:53"}
223
+ {"current_steps": 223, "total_steps": 390, "loss": 0.2687, "lr": 1.8479898273352084e-05, "epoch": 2.8544, "percentage": 57.18, "elapsed_time": "5:07:48", "remaining_time": "3:50:30"}
224
+ {"current_steps": 224, "total_steps": 390, "loss": 0.2727, "lr": 1.83014712087187e-05, "epoch": 2.8672, "percentage": 57.44, "elapsed_time": "5:09:12", "remaining_time": "3:49:08"}
225
+ {"current_steps": 225, "total_steps": 390, "loss": 0.2548, "lr": 1.8123180211936417e-05, "epoch": 2.88, "percentage": 57.69, "elapsed_time": "5:10:36", "remaining_time": "3:47:46"}
226
+ {"current_steps": 226, "total_steps": 390, "loss": 0.25, "lr": 1.794503956576152e-05, "epoch": 2.8928000000000003, "percentage": 57.95, "elapsed_time": "5:11:43", "remaining_time": "3:46:12"}
227
+ {"current_steps": 227, "total_steps": 390, "loss": 0.2476, "lr": 1.776706354090585e-05, "epoch": 2.9055999999999997, "percentage": 58.21, "elapsed_time": "5:13:10", "remaining_time": "3:44:52"}
228
+ {"current_steps": 228, "total_steps": 390, "loss": 0.2792, "lr": 1.758926639489354e-05, "epoch": 2.9184, "percentage": 58.46, "elapsed_time": "5:14:29", "remaining_time": "3:43:27"}
229
+ {"current_steps": 229, "total_steps": 390, "loss": 0.2508, "lr": 1.7411662370918893e-05, "epoch": 2.9312, "percentage": 58.72, "elapsed_time": "5:15:51", "remaining_time": "3:42:03"}
230
+ {"current_steps": 230, "total_steps": 390, "loss": 0.2281, "lr": 1.7234265696705344e-05, "epoch": 2.944, "percentage": 58.97, "elapsed_time": "5:16:53", "remaining_time": "3:40:26"}
231
+ {"current_steps": 231, "total_steps": 390, "loss": 0.2884, "lr": 1.7057090583365678e-05, "epoch": 2.9568, "percentage": 59.23, "elapsed_time": "5:18:33", "remaining_time": "3:39:16"}
232
+ {"current_steps": 232, "total_steps": 390, "loss": 0.2776, "lr": 1.6880151224263646e-05, "epoch": 2.9696, "percentage": 59.49, "elapsed_time": "5:20:10", "remaining_time": "3:38:02"}
233
+ {"current_steps": 233, "total_steps": 390, "loss": 0.264, "lr": 1.6703461793876876e-05, "epoch": 2.9824, "percentage": 59.74, "elapsed_time": "5:21:25", "remaining_time": "3:36:35"}
234
+ {"current_steps": 234, "total_steps": 390, "loss": 0.2781, "lr": 1.6527036446661396e-05, "epoch": 2.9952, "percentage": 60.0, "elapsed_time": "5:22:59", "remaining_time": "3:35:19"}
235
+ {"current_steps": 235, "total_steps": 390, "loss": 0.1995, "lr": 1.635088931591775e-05, "epoch": 3.008, "percentage": 60.26, "elapsed_time": "5:25:30", "remaining_time": "3:34:41"}
236
+ {"current_steps": 236, "total_steps": 390, "loss": 0.1647, "lr": 1.6175034512658753e-05, "epoch": 3.0208, "percentage": 60.51, "elapsed_time": "5:26:40", "remaining_time": "3:33:09"}
237
+ {"current_steps": 237, "total_steps": 390, "loss": 0.2045, "lr": 1.5999486124479115e-05, "epoch": 3.0336, "percentage": 60.77, "elapsed_time": "5:28:02", "remaining_time": "3:31:46"}
238
+ {"current_steps": 238, "total_steps": 390, "loss": 0.1701, "lr": 1.5824258214426833e-05, "epoch": 3.0464, "percentage": 61.03, "elapsed_time": "5:29:20", "remaining_time": "3:30:19"}
239
+ {"current_steps": 239, "total_steps": 390, "loss": 0.1856, "lr": 1.5649364819876655e-05, "epoch": 3.0592, "percentage": 61.28, "elapsed_time": "5:30:39", "remaining_time": "3:28:54"}