Training in progress, step 19240
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +192 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:02e957005f24585ac4b202b6b1e54cb9fedb199d2039574c11cb95309e3aa222
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -3675,3 +3675,195 @@
|
|
| 3675 |
{"current_steps": 18280, "total_steps": 19240, "loss": 0.0045, "lr": 3.7901896751541545e-07, "epoch": 9.5010395010395, "percentage": 95.01, "elapsed_time": "0:50:08", "remaining_time": "0:02:38", "throughput": 1158.57, "total_tokens": 3485728}
|
| 3676 |
{"current_steps": 18285, "total_steps": 19240, "loss": 0.0304, "lr": 3.750950884300108e-07, "epoch": 9.503638253638254, "percentage": 95.04, "elapsed_time": "0:50:09", "remaining_time": "0:02:37", "throughput": 1158.61, "total_tokens": 3486720}
|
| 3677 |
{"current_steps": 18290, "total_steps": 19240, "loss": 0.0765, "lr": 3.71191473062571e-07, "epoch": 9.506237006237006, "percentage": 95.06, "elapsed_time": "0:50:10", "remaining_time": "0:02:36", "throughput": 1158.65, "total_tokens": 3487680}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3675 |
{"current_steps": 18280, "total_steps": 19240, "loss": 0.0045, "lr": 3.7901896751541545e-07, "epoch": 9.5010395010395, "percentage": 95.01, "elapsed_time": "0:50:08", "remaining_time": "0:02:38", "throughput": 1158.57, "total_tokens": 3485728}
|
| 3676 |
{"current_steps": 18285, "total_steps": 19240, "loss": 0.0304, "lr": 3.750950884300108e-07, "epoch": 9.503638253638254, "percentage": 95.04, "elapsed_time": "0:50:09", "remaining_time": "0:02:37", "throughput": 1158.61, "total_tokens": 3486720}
|
| 3677 |
{"current_steps": 18290, "total_steps": 19240, "loss": 0.0765, "lr": 3.71191473062571e-07, "epoch": 9.506237006237006, "percentage": 95.06, "elapsed_time": "0:50:10", "remaining_time": "0:02:36", "throughput": 1158.65, "total_tokens": 3487680}
|
| 3678 |
+
{"current_steps": 18295, "total_steps": 19240, "loss": 0.0784, "lr": 3.6730812462535404e-07, "epoch": 9.508835758835758, "percentage": 95.09, "elapsed_time": "0:50:10", "remaining_time": "0:02:35", "throughput": 1158.68, "total_tokens": 3488640}
|
| 3679 |
+
{"current_steps": 18300, "total_steps": 19240, "loss": 0.2528, "lr": 3.6344504631395934e-07, "epoch": 9.511434511434512, "percentage": 95.11, "elapsed_time": "0:50:11", "remaining_time": "0:02:34", "throughput": 1158.72, "total_tokens": 3489632}
|
| 3680 |
+
{"current_steps": 18305, "total_steps": 19240, "loss": 0.1525, "lr": 3.5960224130728857e-07, "epoch": 9.514033264033264, "percentage": 95.14, "elapsed_time": "0:50:12", "remaining_time": "0:02:33", "throughput": 1158.74, "total_tokens": 3490528}
|
| 3681 |
+
{"current_steps": 18310, "total_steps": 19240, "loss": 0.1942, "lr": 3.5577971276757325e-07, "epoch": 9.516632016632016, "percentage": 95.17, "elapsed_time": "0:50:13", "remaining_time": "0:02:33", "throughput": 1158.75, "total_tokens": 3491424}
|
| 3682 |
+
{"current_steps": 18315, "total_steps": 19240, "loss": 0.0919, "lr": 3.519774638403472e-07, "epoch": 9.51923076923077, "percentage": 95.19, "elapsed_time": "0:50:13", "remaining_time": "0:02:32", "throughput": 1158.74, "total_tokens": 3492256}
|
| 3683 |
+
{"current_steps": 18320, "total_steps": 19240, "loss": 0.1503, "lr": 3.481954976544716e-07, "epoch": 9.521829521829522, "percentage": 95.22, "elapsed_time": "0:50:14", "remaining_time": "0:02:31", "throughput": 1158.76, "total_tokens": 3493152}
|
| 3684 |
+
{"current_steps": 18325, "total_steps": 19240, "loss": 0.0798, "lr": 3.44433817322104e-07, "epoch": 9.524428274428274, "percentage": 95.24, "elapsed_time": "0:50:15", "remaining_time": "0:02:30", "throughput": 1158.77, "total_tokens": 3494048}
|
| 3685 |
+
{"current_steps": 18330, "total_steps": 19240, "loss": 0.2301, "lr": 3.406924259387101e-07, "epoch": 9.527027027027026, "percentage": 95.27, "elapsed_time": "0:50:16", "remaining_time": "0:02:29", "throughput": 1158.82, "total_tokens": 3495040}
|
| 3686 |
+
{"current_steps": 18335, "total_steps": 19240, "loss": 0.0228, "lr": 3.369713265830715e-07, "epoch": 9.52962577962578, "percentage": 95.3, "elapsed_time": "0:50:16", "remaining_time": "0:02:28", "throughput": 1158.86, "total_tokens": 3496064}
|
| 3687 |
+
{"current_steps": 18340, "total_steps": 19240, "loss": 0.0178, "lr": 3.3327052231725276e-07, "epoch": 9.532224532224532, "percentage": 95.32, "elapsed_time": "0:50:17", "remaining_time": "0:02:28", "throughput": 1158.89, "total_tokens": 3496992}
|
| 3688 |
+
{"current_steps": 18345, "total_steps": 19240, "loss": 0.0482, "lr": 3.2959001618664e-07, "epoch": 9.534823284823284, "percentage": 95.35, "elapsed_time": "0:50:18", "remaining_time": "0:02:27", "throughput": 1158.92, "total_tokens": 3497952}
|
| 3689 |
+
{"current_steps": 18350, "total_steps": 19240, "loss": 0.0788, "lr": 3.2592981121989384e-07, "epoch": 9.537422037422038, "percentage": 95.37, "elapsed_time": "0:50:19", "remaining_time": "0:02:26", "throughput": 1158.95, "total_tokens": 3498912}
|
| 3690 |
+
{"current_steps": 18355, "total_steps": 19240, "loss": 0.1904, "lr": 3.222899104289856e-07, "epoch": 9.54002079002079, "percentage": 95.4, "elapsed_time": "0:50:19", "remaining_time": "0:02:25", "throughput": 1158.97, "total_tokens": 3499840}
|
| 3691 |
+
{"current_steps": 18360, "total_steps": 19240, "loss": 0.107, "lr": 3.18670316809172e-07, "epoch": 9.542619542619542, "percentage": 95.43, "elapsed_time": "0:50:20", "remaining_time": "0:02:24", "throughput": 1159.0, "total_tokens": 3500768}
|
| 3692 |
+
{"current_steps": 18365, "total_steps": 19240, "loss": 0.0179, "lr": 3.150710333389983e-07, "epoch": 9.545218295218294, "percentage": 95.45, "elapsed_time": "0:50:21", "remaining_time": "0:02:23", "throughput": 1159.04, "total_tokens": 3501760}
|
| 3693 |
+
{"current_steps": 18370, "total_steps": 19240, "loss": 0.3389, "lr": 3.114920629802981e-07, "epoch": 9.547817047817048, "percentage": 95.48, "elapsed_time": "0:50:22", "remaining_time": "0:02:23", "throughput": 1159.09, "total_tokens": 3502784}
|
| 3694 |
+
{"current_steps": 18375, "total_steps": 19240, "loss": 0.0141, "lr": 3.0793340867818763e-07, "epoch": 9.5504158004158, "percentage": 95.5, "elapsed_time": "0:50:22", "remaining_time": "0:02:22", "throughput": 1159.1, "total_tokens": 3503680}
|
| 3695 |
+
{"current_steps": 18380, "total_steps": 19240, "loss": 0.0768, "lr": 3.04395073361069e-07, "epoch": 9.553014553014552, "percentage": 95.53, "elapsed_time": "0:50:23", "remaining_time": "0:02:21", "throughput": 1159.14, "total_tokens": 3504640}
|
| 3696 |
+
{"current_steps": 18385, "total_steps": 19240, "loss": 0.0963, "lr": 3.008770599406213e-07, "epoch": 9.555613305613306, "percentage": 95.56, "elapsed_time": "0:50:24", "remaining_time": "0:02:20", "throughput": 1159.14, "total_tokens": 3505504}
|
| 3697 |
+
{"current_steps": 18390, "total_steps": 19240, "loss": 0.1558, "lr": 2.973793713118039e-07, "epoch": 9.558212058212058, "percentage": 95.58, "elapsed_time": "0:50:24", "remaining_time": "0:02:19", "throughput": 1159.16, "total_tokens": 3506432}
|
| 3698 |
+
{"current_steps": 18395, "total_steps": 19240, "loss": 0.2122, "lr": 2.9390201035284226e-07, "epoch": 9.56081081081081, "percentage": 95.61, "elapsed_time": "0:50:25", "remaining_time": "0:02:18", "throughput": 1159.17, "total_tokens": 3507328}
|
| 3699 |
+
{"current_steps": 18400, "total_steps": 19240, "loss": 0.0048, "lr": 2.904449799252418e-07, "epoch": 9.563409563409563, "percentage": 95.63, "elapsed_time": "0:50:26", "remaining_time": "0:02:18", "throughput": 1159.18, "total_tokens": 3508192}
|
| 3700 |
+
{"current_steps": 18405, "total_steps": 19240, "loss": 0.0051, "lr": 2.870082828737797e-07, "epoch": 9.566008316008316, "percentage": 95.66, "elapsed_time": "0:50:27", "remaining_time": "0:02:17", "throughput": 1159.19, "total_tokens": 3509088}
|
| 3701 |
+
{"current_steps": 18410, "total_steps": 19240, "loss": 0.0079, "lr": 2.8359192202649376e-07, "epoch": 9.568607068607069, "percentage": 95.69, "elapsed_time": "0:50:27", "remaining_time": "0:02:16", "throughput": 1159.25, "total_tokens": 3510112}
|
| 3702 |
+
{"current_steps": 18415, "total_steps": 19240, "loss": 0.1856, "lr": 2.8019590019469633e-07, "epoch": 9.57120582120582, "percentage": 95.71, "elapsed_time": "0:50:28", "remaining_time": "0:02:15", "throughput": 1159.25, "total_tokens": 3510976}
|
| 3703 |
+
{"current_steps": 18420, "total_steps": 19240, "loss": 0.0065, "lr": 2.7682022017295197e-07, "epoch": 9.573804573804575, "percentage": 95.74, "elapsed_time": "0:50:29", "remaining_time": "0:02:14", "throughput": 1159.28, "total_tokens": 3511936}
|
| 3704 |
+
{"current_steps": 18425, "total_steps": 19240, "loss": 0.0047, "lr": 2.734648847390997e-07, "epoch": 9.576403326403327, "percentage": 95.76, "elapsed_time": "0:50:30", "remaining_time": "0:02:14", "throughput": 1159.32, "total_tokens": 3512896}
|
| 3705 |
+
{"current_steps": 18430, "total_steps": 19240, "loss": 0.1324, "lr": 2.7012989665421706e-07, "epoch": 9.579002079002079, "percentage": 95.79, "elapsed_time": "0:50:30", "remaining_time": "0:02:13", "throughput": 1159.33, "total_tokens": 3513792}
|
| 3706 |
+
{"current_steps": 18435, "total_steps": 19240, "loss": 0.0109, "lr": 2.6681525866266157e-07, "epoch": 9.58160083160083, "percentage": 95.82, "elapsed_time": "0:50:31", "remaining_time": "0:02:12", "throughput": 1159.39, "total_tokens": 3514816}
|
| 3707 |
+
{"current_steps": 18440, "total_steps": 19240, "loss": 0.19, "lr": 2.635209734920291e-07, "epoch": 9.584199584199585, "percentage": 95.84, "elapsed_time": "0:50:32", "remaining_time": "0:02:11", "throughput": 1159.44, "total_tokens": 3515840}
|
| 3708 |
+
{"current_steps": 18445, "total_steps": 19240, "loss": 0.1299, "lr": 2.602470438531679e-07, "epoch": 9.586798336798337, "percentage": 95.87, "elapsed_time": "0:50:33", "remaining_time": "0:02:10", "throughput": 1159.44, "total_tokens": 3516704}
|
| 3709 |
+
{"current_steps": 18450, "total_steps": 19240, "loss": 0.0279, "lr": 2.5699347244018404e-07, "epoch": 9.589397089397089, "percentage": 95.89, "elapsed_time": "0:50:33", "remaining_time": "0:02:09", "throughput": 1159.48, "total_tokens": 3517664}
|
| 3710 |
+
{"current_steps": 18455, "total_steps": 19240, "loss": 0.0636, "lr": 2.537602619304247e-07, "epoch": 9.591995841995843, "percentage": 95.92, "elapsed_time": "0:50:34", "remaining_time": "0:02:09", "throughput": 1159.53, "total_tokens": 3518688}
|
| 3711 |
+
{"current_steps": 18460, "total_steps": 19240, "loss": 0.0833, "lr": 2.5054741498448386e-07, "epoch": 9.594594594594595, "percentage": 95.95, "elapsed_time": "0:50:35", "remaining_time": "0:02:08", "throughput": 1159.56, "total_tokens": 3519648}
|
| 3712 |
+
{"current_steps": 18465, "total_steps": 19240, "loss": 0.159, "lr": 2.4735493424619394e-07, "epoch": 9.597193347193347, "percentage": 95.97, "elapsed_time": "0:50:36", "remaining_time": "0:02:07", "throughput": 1159.59, "total_tokens": 3520576}
|
| 3713 |
+
{"current_steps": 18470, "total_steps": 19240, "loss": 0.114, "lr": 2.4418282234263957e-07, "epoch": 9.5997920997921, "percentage": 96.0, "elapsed_time": "0:50:36", "remaining_time": "0:02:06", "throughput": 1159.62, "total_tokens": 3521536}
|
| 3714 |
+
{"current_steps": 18475, "total_steps": 19240, "loss": 0.1238, "lr": 2.410310818841299e-07, "epoch": 9.602390852390853, "percentage": 96.02, "elapsed_time": "0:50:37", "remaining_time": "0:02:05", "throughput": 1159.66, "total_tokens": 3522528}
|
| 3715 |
+
{"current_steps": 18480, "total_steps": 19240, "loss": 0.0283, "lr": 2.3789971546422374e-07, "epoch": 9.604989604989605, "percentage": 96.05, "elapsed_time": "0:50:38", "remaining_time": "0:02:04", "throughput": 1159.7, "total_tokens": 3523520}
|
| 3716 |
+
{"current_steps": 18485, "total_steps": 19240, "loss": 0.0968, "lr": 2.3478872565969867e-07, "epoch": 9.607588357588357, "percentage": 96.08, "elapsed_time": "0:50:39", "remaining_time": "0:02:04", "throughput": 1159.74, "total_tokens": 3524480}
|
| 3717 |
+
{"current_steps": 18490, "total_steps": 19240, "loss": 0.231, "lr": 2.316981150305847e-07, "epoch": 9.61018711018711, "percentage": 96.1, "elapsed_time": "0:50:39", "remaining_time": "0:02:03", "throughput": 1159.76, "total_tokens": 3525408}
|
| 3718 |
+
{"current_steps": 18495, "total_steps": 19240, "loss": 0.0106, "lr": 2.2862788612012244e-07, "epoch": 9.612785862785863, "percentage": 96.13, "elapsed_time": "0:50:40", "remaining_time": "0:02:02", "throughput": 1159.77, "total_tokens": 3526304}
|
| 3719 |
+
{"current_steps": 18500, "total_steps": 19240, "loss": 0.0505, "lr": 2.255780414547909e-07, "epoch": 9.615384615384615, "percentage": 96.15, "elapsed_time": "0:50:41", "remaining_time": "0:02:01", "throughput": 1159.8, "total_tokens": 3527232}
|
| 3720 |
+
{"current_steps": 18505, "total_steps": 19240, "loss": 0.2529, "lr": 2.2254858354429364e-07, "epoch": 9.617983367983367, "percentage": 96.18, "elapsed_time": "0:50:41", "remaining_time": "0:02:00", "throughput": 1159.84, "total_tokens": 3528224}
|
| 3721 |
+
{"current_steps": 18510, "total_steps": 19240, "loss": 0.0323, "lr": 2.19539514881556e-07, "epoch": 9.620582120582121, "percentage": 96.21, "elapsed_time": "0:50:42", "remaining_time": "0:01:59", "throughput": 1159.85, "total_tokens": 3529120}
|
| 3722 |
+
{"current_steps": 18515, "total_steps": 19240, "loss": 0.0377, "lr": 2.165508379427278e-07, "epoch": 9.623180873180873, "percentage": 96.23, "elapsed_time": "0:50:43", "remaining_time": "0:01:59", "throughput": 1159.88, "total_tokens": 3530048}
|
| 3723 |
+
{"current_steps": 18520, "total_steps": 19240, "loss": 0.022, "lr": 2.1358255518717786e-07, "epoch": 9.625779625779625, "percentage": 96.26, "elapsed_time": "0:50:44", "remaining_time": "0:01:58", "throughput": 1159.92, "total_tokens": 3531040}
|
| 3724 |
+
{"current_steps": 18525, "total_steps": 19240, "loss": 0.0526, "lr": 2.106346690574912e-07, "epoch": 9.628378378378379, "percentage": 96.28, "elapsed_time": "0:50:44", "remaining_time": "0:01:57", "throughput": 1159.98, "total_tokens": 3532096}
|
| 3725 |
+
{"current_steps": 18530, "total_steps": 19240, "loss": 0.1088, "lr": 2.0770718197946625e-07, "epoch": 9.630977130977131, "percentage": 96.31, "elapsed_time": "0:50:45", "remaining_time": "0:01:56", "throughput": 1160.03, "total_tokens": 3533088}
|
| 3726 |
+
{"current_steps": 18535, "total_steps": 19240, "loss": 0.0051, "lr": 2.0480009636212327e-07, "epoch": 9.633575883575883, "percentage": 96.34, "elapsed_time": "0:50:46", "remaining_time": "0:01:55", "throughput": 1160.07, "total_tokens": 3534080}
|
| 3727 |
+
{"current_steps": 18540, "total_steps": 19240, "loss": 0.0566, "lr": 2.0191341459768475e-07, "epoch": 9.636174636174637, "percentage": 96.36, "elapsed_time": "0:50:47", "remaining_time": "0:01:55", "throughput": 1160.08, "total_tokens": 3534976}
|
| 3728 |
+
{"current_steps": 18545, "total_steps": 19240, "loss": 0.0713, "lr": 1.9904713906159224e-07, "epoch": 9.638773388773389, "percentage": 96.39, "elapsed_time": "0:50:47", "remaining_time": "0:01:54", "throughput": 1160.12, "total_tokens": 3535936}
|
| 3729 |
+
{"current_steps": 18550, "total_steps": 19240, "loss": 0.2084, "lr": 1.9620127211248672e-07, "epoch": 9.641372141372141, "percentage": 96.41, "elapsed_time": "0:50:48", "remaining_time": "0:01:53", "throughput": 1160.15, "total_tokens": 3536896}
|
| 3730 |
+
{"current_steps": 18555, "total_steps": 19240, "loss": 0.0292, "lr": 1.9337581609222277e-07, "epoch": 9.643970893970893, "percentage": 96.44, "elapsed_time": "0:50:49", "remaining_time": "0:01:52", "throughput": 1160.18, "total_tokens": 3537856}
|
| 3731 |
+
{"current_steps": 18560, "total_steps": 19240, "loss": 0.2974, "lr": 1.9057077332584883e-07, "epoch": 9.646569646569647, "percentage": 96.47, "elapsed_time": "0:50:50", "remaining_time": "0:01:51", "throughput": 1160.19, "total_tokens": 3538752}
|
| 3732 |
+
{"current_steps": 18565, "total_steps": 19240, "loss": 0.1961, "lr": 1.8778614612162404e-07, "epoch": 9.6491683991684, "percentage": 96.49, "elapsed_time": "0:50:50", "remaining_time": "0:01:50", "throughput": 1160.24, "total_tokens": 3539744}
|
| 3733 |
+
{"current_steps": 18570, "total_steps": 19240, "loss": 0.0066, "lr": 1.850219367710071e-07, "epoch": 9.651767151767151, "percentage": 96.52, "elapsed_time": "0:50:51", "remaining_time": "0:01:50", "throughput": 1160.25, "total_tokens": 3540640}
|
| 3734 |
+
{"current_steps": 18575, "total_steps": 19240, "loss": 0.0336, "lr": 1.8227814754865068e-07, "epoch": 9.654365904365905, "percentage": 96.54, "elapsed_time": "0:50:52", "remaining_time": "0:01:49", "throughput": 1160.26, "total_tokens": 3541536}
|
| 3735 |
+
{"current_steps": 18580, "total_steps": 19240, "loss": 0.0503, "lr": 1.7955478071240706e-07, "epoch": 9.656964656964657, "percentage": 96.57, "elapsed_time": "0:50:53", "remaining_time": "0:01:48", "throughput": 1160.28, "total_tokens": 3542432}
|
| 3736 |
+
{"current_steps": 18585, "total_steps": 19240, "loss": 0.0568, "lr": 1.7685183850331965e-07, "epoch": 9.65956340956341, "percentage": 96.6, "elapsed_time": "0:50:53", "remaining_time": "0:01:47", "throughput": 1160.31, "total_tokens": 3543392}
|
| 3737 |
+
{"current_steps": 18590, "total_steps": 19240, "loss": 0.2099, "lr": 1.7416932314562872e-07, "epoch": 9.662162162162161, "percentage": 96.62, "elapsed_time": "0:50:54", "remaining_time": "0:01:46", "throughput": 1160.35, "total_tokens": 3544352}
|
| 3738 |
+
{"current_steps": 18595, "total_steps": 19240, "loss": 0.2469, "lr": 1.7150723684676572e-07, "epoch": 9.664760914760915, "percentage": 96.65, "elapsed_time": "0:50:55", "remaining_time": "0:01:45", "throughput": 1160.37, "total_tokens": 3545280}
|
| 3739 |
+
{"current_steps": 18600, "total_steps": 19240, "loss": 0.1279, "lr": 1.6886558179734225e-07, "epoch": 9.667359667359667, "percentage": 96.67, "elapsed_time": "0:50:56", "remaining_time": "0:01:45", "throughput": 1160.4, "total_tokens": 3546240}
|
| 3740 |
+
{"current_steps": 18605, "total_steps": 19240, "loss": 0.0692, "lr": 1.662443601711694e-07, "epoch": 9.66995841995842, "percentage": 96.7, "elapsed_time": "0:50:56", "remaining_time": "0:01:44", "throughput": 1160.43, "total_tokens": 3547200}
|
| 3741 |
+
{"current_steps": 18610, "total_steps": 19240, "loss": 0.066, "lr": 1.6364357412523845e-07, "epoch": 9.672557172557173, "percentage": 96.73, "elapsed_time": "0:50:57", "remaining_time": "0:01:43", "throughput": 1160.48, "total_tokens": 3548224}
|
| 3742 |
+
{"current_steps": 18615, "total_steps": 19240, "loss": 0.1939, "lr": 1.6106322579972077e-07, "epoch": 9.675155925155925, "percentage": 96.75, "elapsed_time": "0:50:58", "remaining_time": "0:01:42", "throughput": 1160.52, "total_tokens": 3549216}
|
| 3743 |
+
{"current_steps": 18620, "total_steps": 19240, "loss": 0.008, "lr": 1.585033173179734e-07, "epoch": 9.677754677754677, "percentage": 96.78, "elapsed_time": "0:50:59", "remaining_time": "0:01:41", "throughput": 1160.57, "total_tokens": 3550208}
|
| 3744 |
+
{"current_steps": 18625, "total_steps": 19240, "loss": 0.0109, "lr": 1.5596385078653353e-07, "epoch": 9.68035343035343, "percentage": 96.8, "elapsed_time": "0:50:59", "remaining_time": "0:01:41", "throughput": 1160.6, "total_tokens": 3551168}
|
| 3745 |
+
{"current_steps": 18630, "total_steps": 19240, "loss": 0.0072, "lr": 1.5344482829511842e-07, "epoch": 9.682952182952183, "percentage": 96.83, "elapsed_time": "0:51:00", "remaining_time": "0:01:40", "throughput": 1160.64, "total_tokens": 3552160}
|
| 3746 |
+
{"current_steps": 18635, "total_steps": 19240, "loss": 0.1374, "lr": 1.5094625191661715e-07, "epoch": 9.685550935550935, "percentage": 96.86, "elapsed_time": "0:51:01", "remaining_time": "0:01:39", "throughput": 1160.68, "total_tokens": 3553152}
|
| 3747 |
+
{"current_steps": 18640, "total_steps": 19240, "loss": 0.0371, "lr": 1.4846812370709617e-07, "epoch": 9.688149688149688, "percentage": 96.88, "elapsed_time": "0:51:01", "remaining_time": "0:01:38", "throughput": 1160.71, "total_tokens": 3554080}
|
| 3748 |
+
{"current_steps": 18645, "total_steps": 19240, "loss": 0.054, "lr": 1.4601044570579647e-07, "epoch": 9.690748440748441, "percentage": 96.91, "elapsed_time": "0:51:02", "remaining_time": "0:01:37", "throughput": 1160.74, "total_tokens": 3555040}
|
| 3749 |
+
{"current_steps": 18650, "total_steps": 19240, "loss": 0.0822, "lr": 1.4357321993513084e-07, "epoch": 9.693347193347194, "percentage": 96.93, "elapsed_time": "0:51:03", "remaining_time": "0:01:36", "throughput": 1160.77, "total_tokens": 3556032}
|
| 3750 |
+
{"current_steps": 18655, "total_steps": 19240, "loss": 0.024, "lr": 1.4115644840067833e-07, "epoch": 9.695945945945946, "percentage": 96.96, "elapsed_time": "0:51:04", "remaining_time": "0:01:36", "throughput": 1160.82, "total_tokens": 3557024}
|
| 3751 |
+
{"current_steps": 18660, "total_steps": 19240, "loss": 0.0883, "lr": 1.3876013309118697e-07, "epoch": 9.698544698544698, "percentage": 96.99, "elapsed_time": "0:51:04", "remaining_time": "0:01:35", "throughput": 1160.85, "total_tokens": 3557984}
|
| 3752 |
+
{"current_steps": 18665, "total_steps": 19240, "loss": 0.2261, "lr": 1.363842759785794e-07, "epoch": 9.701143451143452, "percentage": 97.01, "elapsed_time": "0:51:05", "remaining_time": "0:01:34", "throughput": 1160.89, "total_tokens": 3558976}
|
| 3753 |
+
{"current_steps": 18670, "total_steps": 19240, "loss": 0.2905, "lr": 1.3402887901793338e-07, "epoch": 9.703742203742204, "percentage": 97.04, "elapsed_time": "0:51:06", "remaining_time": "0:01:33", "throughput": 1160.9, "total_tokens": 3559872}
|
| 3754 |
+
{"current_steps": 18675, "total_steps": 19240, "loss": 0.0529, "lr": 1.316939441474957e-07, "epoch": 9.706340956340956, "percentage": 97.06, "elapsed_time": "0:51:07", "remaining_time": "0:01:32", "throughput": 1160.92, "total_tokens": 3560800}
|
| 3755 |
+
{"current_steps": 18680, "total_steps": 19240, "loss": 0.0445, "lr": 1.2937947328867106e-07, "epoch": 9.70893970893971, "percentage": 97.09, "elapsed_time": "0:51:07", "remaining_time": "0:01:31", "throughput": 1160.96, "total_tokens": 3561760}
|
| 3756 |
+
{"current_steps": 18685, "total_steps": 19240, "loss": 0.2553, "lr": 1.270854683460304e-07, "epoch": 9.711538461538462, "percentage": 97.12, "elapsed_time": "0:51:08", "remaining_time": "0:01:31", "throughput": 1160.98, "total_tokens": 3562688}
|
| 3757 |
+
{"current_steps": 18690, "total_steps": 19240, "loss": 0.0407, "lr": 1.2481193120729427e-07, "epoch": 9.714137214137214, "percentage": 97.14, "elapsed_time": "0:51:09", "remaining_time": "0:01:30", "throughput": 1161.04, "total_tokens": 3563744}
|
| 3758 |
+
{"current_steps": 18695, "total_steps": 19240, "loss": 0.0061, "lr": 1.2255886374334946e-07, "epoch": 9.716735966735968, "percentage": 97.17, "elapsed_time": "0:51:10", "remaining_time": "0:01:29", "throughput": 1161.07, "total_tokens": 3564672}
|
| 3759 |
+
{"current_steps": 18700, "total_steps": 19240, "loss": 0.2019, "lr": 1.203262678082323e-07, "epoch": 9.71933471933472, "percentage": 97.19, "elapsed_time": "0:51:10", "remaining_time": "0:01:28", "throughput": 1161.09, "total_tokens": 3565600}
|
| 3760 |
+
{"current_steps": 18705, "total_steps": 19240, "loss": 0.0147, "lr": 1.1811414523913711e-07, "epoch": 9.721933471933472, "percentage": 97.22, "elapsed_time": "0:51:11", "remaining_time": "0:01:27", "throughput": 1161.13, "total_tokens": 3566624}
|
| 3761 |
+
{"current_steps": 18710, "total_steps": 19240, "loss": 0.0181, "lr": 1.1592249785641052e-07, "epoch": 9.724532224532224, "percentage": 97.25, "elapsed_time": "0:51:12", "remaining_time": "0:01:27", "throughput": 1161.16, "total_tokens": 3567584}
|
| 3762 |
+
{"current_steps": 18715, "total_steps": 19240, "loss": 0.1037, "lr": 1.1375132746354322e-07, "epoch": 9.727130977130978, "percentage": 97.27, "elapsed_time": "0:51:13", "remaining_time": "0:01:26", "throughput": 1161.18, "total_tokens": 3568480}
|
| 3763 |
+
{"current_steps": 18720, "total_steps": 19240, "loss": 0.0137, "lr": 1.1160063584718661e-07, "epoch": 9.72972972972973, "percentage": 97.3, "elapsed_time": "0:51:13", "remaining_time": "0:01:25", "throughput": 1161.2, "total_tokens": 3569408}
|
| 3764 |
+
{"current_steps": 18725, "total_steps": 19240, "loss": 0.0076, "lr": 1.0947042477713332e-07, "epoch": 9.732328482328482, "percentage": 97.32, "elapsed_time": "0:51:14", "remaining_time": "0:01:24", "throughput": 1161.23, "total_tokens": 3570368}
|
| 3765 |
+
{"current_steps": 18730, "total_steps": 19240, "loss": 0.1021, "lr": 1.0736069600632281e-07, "epoch": 9.734927234927234, "percentage": 97.35, "elapsed_time": "0:51:15", "remaining_time": "0:01:23", "throughput": 1161.26, "total_tokens": 3571328}
|
| 3766 |
+
{"current_steps": 18735, "total_steps": 19240, "loss": 0.0366, "lr": 1.0527145127084136e-07, "epoch": 9.737525987525988, "percentage": 97.38, "elapsed_time": "0:51:16", "remaining_time": "0:01:22", "throughput": 1161.32, "total_tokens": 3572384}
|
| 3767 |
+
{"current_steps": 18740, "total_steps": 19240, "loss": 0.044, "lr": 1.032026922899193e-07, "epoch": 9.74012474012474, "percentage": 97.4, "elapsed_time": "0:51:16", "remaining_time": "0:01:22", "throughput": 1161.36, "total_tokens": 3573376}
|
| 3768 |
+
{"current_steps": 18745, "total_steps": 19240, "loss": 0.0071, "lr": 1.0115442076592541e-07, "epoch": 9.742723492723492, "percentage": 97.43, "elapsed_time": "0:51:17", "remaining_time": "0:01:21", "throughput": 1161.38, "total_tokens": 3574304}
|
| 3769 |
+
{"current_steps": 18750, "total_steps": 19240, "loss": 0.0068, "lr": 9.912663838437808e-08, "epoch": 9.745322245322246, "percentage": 97.45, "elapsed_time": "0:51:18", "remaining_time": "0:01:20", "throughput": 1161.44, "total_tokens": 3575328}
|
| 3770 |
+
{"current_steps": 18755, "total_steps": 19240, "loss": 0.1821, "lr": 9.711934681392587e-08, "epoch": 9.747920997920998, "percentage": 97.48, "elapsed_time": "0:51:19", "remaining_time": "0:01:19", "throughput": 1161.47, "total_tokens": 3576288}
|
| 3771 |
+
{"current_steps": 18760, "total_steps": 19240, "loss": 0.1546, "lr": 9.513254770636137e-08, "epoch": 9.75051975051975, "percentage": 97.51, "elapsed_time": "0:51:19", "remaining_time": "0:01:18", "throughput": 1161.52, "total_tokens": 3577312}
|
| 3772 |
+
{"current_steps": 18765, "total_steps": 19240, "loss": 0.0267, "lr": 9.31662426966129e-08, "epoch": 9.753118503118504, "percentage": 97.53, "elapsed_time": "0:51:20", "remaining_time": "0:01:17", "throughput": 1161.54, "total_tokens": 3578240}
|
| 3773 |
+
{"current_steps": 18770, "total_steps": 19240, "loss": 0.1005, "lr": 9.122043340273889e-08, "epoch": 9.755717255717256, "percentage": 97.56, "elapsed_time": "0:51:21", "remaining_time": "0:01:17", "throughput": 1161.57, "total_tokens": 3579168}
|
| 3774 |
+
{"current_steps": 18775, "total_steps": 19240, "loss": 0.0847, "lr": 8.929512142594187e-08, "epoch": 9.758316008316008, "percentage": 97.58, "elapsed_time": "0:51:22", "remaining_time": "0:01:16", "throughput": 1161.59, "total_tokens": 3580096}
|
| 3775 |
+
{"current_steps": 18780, "total_steps": 19240, "loss": 0.1136, "lr": 8.739030835055173e-08, "epoch": 9.76091476091476, "percentage": 97.61, "elapsed_time": "0:51:22", "remaining_time": "0:01:15", "throughput": 1161.6, "total_tokens": 3580992}
|
| 3776 |
+
{"current_steps": 18785, "total_steps": 19240, "loss": 0.012, "lr": 8.550599574402574e-08, "epoch": 9.763513513513514, "percentage": 97.64, "elapsed_time": "0:51:23", "remaining_time": "0:01:14", "throughput": 1161.64, "total_tokens": 3581984}
|
| 3777 |
+
{"current_steps": 18790, "total_steps": 19240, "loss": 0.0127, "lr": 8.364218515695965e-08, "epoch": 9.766112266112266, "percentage": 97.66, "elapsed_time": "0:51:24", "remaining_time": "0:01:13", "throughput": 1161.67, "total_tokens": 3582912}
|
| 3778 |
+
{"current_steps": 18795, "total_steps": 19240, "loss": 0.0173, "lr": 8.179887812307386e-08, "epoch": 9.768711018711018, "percentage": 97.69, "elapsed_time": "0:51:25", "remaining_time": "0:01:13", "throughput": 1161.7, "total_tokens": 3583872}
|
| 3779 |
+
{"current_steps": 18800, "total_steps": 19240, "loss": 0.1184, "lr": 7.99760761592161e-08, "epoch": 9.771309771309772, "percentage": 97.71, "elapsed_time": "0:51:25", "remaining_time": "0:01:12", "throughput": 1161.71, "total_tokens": 3584768}
|
| 3780 |
+
{"current_steps": 18805, "total_steps": 19240, "loss": 0.1372, "lr": 7.817378076536153e-08, "epoch": 9.773908523908524, "percentage": 97.74, "elapsed_time": "0:51:26", "remaining_time": "0:01:11", "throughput": 1161.73, "total_tokens": 3585664}
|
| 3781 |
+
{"current_steps": 18810, "total_steps": 19240, "loss": 0.0592, "lr": 7.63919934246099e-08, "epoch": 9.776507276507276, "percentage": 97.77, "elapsed_time": "0:51:27", "remaining_time": "0:01:10", "throughput": 1161.75, "total_tokens": 3586624}
|
| 3782 |
+
{"current_steps": 18815, "total_steps": 19240, "loss": 0.118, "lr": 7.463071560318835e-08, "epoch": 9.779106029106028, "percentage": 97.79, "elapsed_time": "0:51:27", "remaining_time": "0:01:09", "throughput": 1161.79, "total_tokens": 3587616}
|
| 3783 |
+
{"current_steps": 18820, "total_steps": 19240, "loss": 0.12, "lr": 7.288994875044308e-08, "epoch": 9.781704781704782, "percentage": 97.82, "elapsed_time": "0:51:28", "remaining_time": "0:01:08", "throughput": 1161.82, "total_tokens": 3588544}
|
| 3784 |
+
{"current_steps": 18825, "total_steps": 19240, "loss": 0.2117, "lr": 7.116969429883935e-08, "epoch": 9.784303534303534, "percentage": 97.84, "elapsed_time": "0:51:29", "remaining_time": "0:01:08", "throughput": 1161.84, "total_tokens": 3589472}
|
| 3785 |
+
{"current_steps": 18830, "total_steps": 19240, "loss": 0.0524, "lr": 6.946995366397257e-08, "epoch": 9.786902286902286, "percentage": 97.87, "elapsed_time": "0:51:30", "remaining_time": "0:01:07", "throughput": 1161.87, "total_tokens": 3590432}
|
| 3786 |
+
{"current_steps": 18835, "total_steps": 19240, "loss": 0.0762, "lr": 6.779072824454614e-08, "epoch": 9.78950103950104, "percentage": 97.9, "elapsed_time": "0:51:30", "remaining_time": "0:01:06", "throughput": 1161.91, "total_tokens": 3591424}
|
| 3787 |
+
{"current_steps": 18840, "total_steps": 19240, "loss": 0.0327, "lr": 6.6132019422388e-08, "epoch": 9.792099792099792, "percentage": 97.92, "elapsed_time": "0:51:31", "remaining_time": "0:01:05", "throughput": 1161.96, "total_tokens": 3592448}
|
| 3788 |
+
{"current_steps": 18845, "total_steps": 19240, "loss": 0.1824, "lr": 6.449382856244246e-08, "epoch": 9.794698544698544, "percentage": 97.95, "elapsed_time": "0:51:32", "remaining_time": "0:01:04", "throughput": 1162.01, "total_tokens": 3593472}
|
| 3789 |
+
{"current_steps": 18850, "total_steps": 19240, "loss": 0.2351, "lr": 6.287615701277005e-08, "epoch": 9.797297297297296, "percentage": 97.97, "elapsed_time": "0:51:33", "remaining_time": "0:01:03", "throughput": 1162.07, "total_tokens": 3594496}
|
| 3790 |
+
{"current_steps": 18855, "total_steps": 19240, "loss": 0.0437, "lr": 6.127900610454207e-08, "epoch": 9.79989604989605, "percentage": 98.0, "elapsed_time": "0:51:33", "remaining_time": "0:01:03", "throughput": 1162.1, "total_tokens": 3595456}
|
| 3791 |
+
{"current_steps": 18860, "total_steps": 19240, "loss": 0.1413, "lr": 5.970237715204885e-08, "epoch": 9.802494802494802, "percentage": 98.02, "elapsed_time": "0:51:34", "remaining_time": "0:01:02", "throughput": 1162.11, "total_tokens": 3596352}
|
| 3792 |
+
{"current_steps": 18865, "total_steps": 19240, "loss": 0.07, "lr": 5.814627145269147e-08, "epoch": 9.805093555093555, "percentage": 98.05, "elapsed_time": "0:51:35", "remaining_time": "0:01:01", "throughput": 1162.15, "total_tokens": 3597344}
|
| 3793 |
+
{"current_steps": 18870, "total_steps": 19240, "loss": 0.0058, "lr": 5.661069028697896e-08, "epoch": 9.807692307692308, "percentage": 98.08, "elapsed_time": "0:51:36", "remaining_time": "0:01:00", "throughput": 1162.17, "total_tokens": 3598272}
|
| 3794 |
+
{"current_steps": 18875, "total_steps": 19240, "loss": 0.0814, "lr": 5.509563491853942e-08, "epoch": 9.81029106029106, "percentage": 98.1, "elapsed_time": "0:51:36", "remaining_time": "0:00:59", "throughput": 1162.2, "total_tokens": 3599200}
|
| 3795 |
+
{"current_steps": 18880, "total_steps": 19240, "loss": 0.0137, "lr": 5.3601106594097784e-08, "epoch": 9.812889812889813, "percentage": 98.13, "elapsed_time": "0:51:37", "remaining_time": "0:00:59", "throughput": 1162.24, "total_tokens": 3600224}
|
| 3796 |
+
{"current_steps": 18885, "total_steps": 19240, "loss": 0.0821, "lr": 5.2127106543498063e-08, "epoch": 9.815488565488565, "percentage": 98.15, "elapsed_time": "0:51:38", "remaining_time": "0:00:58", "throughput": 1162.26, "total_tokens": 3601120}
|
| 3797 |
+
{"current_steps": 18890, "total_steps": 19240, "loss": 0.0738, "lr": 5.0673635979686665e-08, "epoch": 9.818087318087318, "percentage": 98.18, "elapsed_time": "0:51:39", "remaining_time": "0:00:57", "throughput": 1162.29, "total_tokens": 3602080}
|
| 3798 |
+
{"current_steps": 18895, "total_steps": 19240, "loss": 0.0103, "lr": 4.924069609872073e-08, "epoch": 9.82068607068607, "percentage": 98.21, "elapsed_time": "0:51:39", "remaining_time": "0:00:56", "throughput": 1162.33, "total_tokens": 3603072}
|
| 3799 |
+
{"current_steps": 18900, "total_steps": 19240, "loss": 0.1611, "lr": 4.7828288079757035e-08, "epoch": 9.823284823284823, "percentage": 98.23, "elapsed_time": "0:51:40", "remaining_time": "0:00:55", "throughput": 1162.36, "total_tokens": 3604032}
|
| 3800 |
+
{"current_steps": 18905, "total_steps": 19240, "loss": 0.0321, "lr": 4.643641308505753e-08, "epoch": 9.825883575883577, "percentage": 98.26, "elapsed_time": "0:51:41", "remaining_time": "0:00:54", "throughput": 1162.38, "total_tokens": 3604960}
|
| 3801 |
+
{"current_steps": 18910, "total_steps": 19240, "loss": 0.007, "lr": 4.50650722599949e-08, "epoch": 9.828482328482329, "percentage": 98.28, "elapsed_time": "0:51:42", "remaining_time": "0:00:54", "throughput": 1162.39, "total_tokens": 3605856}
|
| 3802 |
+
{"current_steps": 18915, "total_steps": 19240, "loss": 0.0089, "lr": 4.3714266733035914e-08, "epoch": 9.83108108108108, "percentage": 98.31, "elapsed_time": "0:51:42", "remaining_time": "0:00:53", "throughput": 1162.42, "total_tokens": 3606816}
|
| 3803 |
+
{"current_steps": 18920, "total_steps": 19240, "loss": 0.0751, "lr": 4.238399761574974e-08, "epoch": 9.833679833679835, "percentage": 98.34, "elapsed_time": "0:51:43", "remaining_time": "0:00:52", "throughput": 1162.45, "total_tokens": 3607776}
|
| 3804 |
+
{"current_steps": 18925, "total_steps": 19240, "loss": 0.0102, "lr": 4.10742660028135e-08, "epoch": 9.836278586278587, "percentage": 98.36, "elapsed_time": "0:51:44", "remaining_time": "0:00:51", "throughput": 1162.48, "total_tokens": 3608736}
|
| 3805 |
+
{"current_steps": 18930, "total_steps": 19240, "loss": 0.0081, "lr": 3.978507297199285e-08, "epoch": 9.838877338877339, "percentage": 98.39, "elapsed_time": "0:51:45", "remaining_time": "0:00:50", "throughput": 1162.52, "total_tokens": 3609728}
|
| 3806 |
+
{"current_steps": 18935, "total_steps": 19240, "loss": 0.1115, "lr": 3.851641958416696e-08, "epoch": 9.84147609147609, "percentage": 98.41, "elapsed_time": "0:51:45", "remaining_time": "0:00:50", "throughput": 1162.57, "total_tokens": 3610720}
|
| 3807 |
+
{"current_steps": 18940, "total_steps": 19240, "loss": 0.0942, "lr": 3.7268306883297966e-08, "epoch": 9.844074844074845, "percentage": 98.44, "elapsed_time": "0:51:46", "remaining_time": "0:00:49", "throughput": 1162.59, "total_tokens": 3611648}
|
| 3808 |
+
{"current_steps": 18945, "total_steps": 19240, "loss": 0.1231, "lr": 3.604073589645596e-08, "epoch": 9.846673596673597, "percentage": 98.47, "elapsed_time": "0:51:47", "remaining_time": "0:00:48", "throughput": 1162.62, "total_tokens": 3612608}
|
| 3809 |
+
{"current_steps": 18950, "total_steps": 19240, "loss": 0.0786, "lr": 3.4833707633799565e-08, "epoch": 9.849272349272349, "percentage": 98.49, "elapsed_time": "0:51:48", "remaining_time": "0:00:47", "throughput": 1162.66, "total_tokens": 3613600}
|
| 3810 |
+
{"current_steps": 18955, "total_steps": 19240, "loss": 0.1689, "lr": 3.3647223088589805e-08, "epoch": 9.851871101871101, "percentage": 98.52, "elapsed_time": "0:51:48", "remaining_time": "0:00:46", "throughput": 1162.69, "total_tokens": 3614560}
|
| 3811 |
+
{"current_steps": 18960, "total_steps": 19240, "loss": 0.0082, "lr": 3.248128323717625e-08, "epoch": 9.854469854469855, "percentage": 98.54, "elapsed_time": "0:51:49", "remaining_time": "0:00:45", "throughput": 1162.71, "total_tokens": 3615488}
|
| 3812 |
+
{"current_steps": 18965, "total_steps": 19240, "loss": 0.0138, "lr": 3.133588903900808e-08, "epoch": 9.857068607068607, "percentage": 98.57, "elapsed_time": "0:51:50", "remaining_time": "0:00:45", "throughput": 1162.74, "total_tokens": 3616448}
|
| 3813 |
+
{"current_steps": 18970, "total_steps": 19240, "loss": 0.0727, "lr": 3.021104143662301e-08, "epoch": 9.859667359667359, "percentage": 98.6, "elapsed_time": "0:51:51", "remaining_time": "0:00:44", "throughput": 1162.75, "total_tokens": 3617312}
|
| 3814 |
+
{"current_steps": 18975, "total_steps": 19240, "loss": 0.0187, "lr": 2.910674135565561e-08, "epoch": 9.862266112266113, "percentage": 98.62, "elapsed_time": "0:51:51", "remaining_time": "0:00:43", "throughput": 1162.78, "total_tokens": 3618272}
|
| 3815 |
+
{"current_steps": 18980, "total_steps": 19240, "loss": 0.0078, "lr": 2.8022989704826196e-08, "epoch": 9.864864864864865, "percentage": 98.65, "elapsed_time": "0:51:52", "remaining_time": "0:00:42", "throughput": 1162.78, "total_tokens": 3619136}
|
| 3816 |
+
{"current_steps": 18985, "total_steps": 19240, "loss": 0.0324, "lr": 2.6959787375949174e-08, "epoch": 9.867463617463617, "percentage": 98.67, "elapsed_time": "0:51:53", "remaining_time": "0:00:41", "throughput": 1162.83, "total_tokens": 3620160}
|
| 3817 |
+
{"current_steps": 18990, "total_steps": 19240, "loss": 0.0186, "lr": 2.5917135243930245e-08, "epoch": 9.87006237006237, "percentage": 98.7, "elapsed_time": "0:51:53", "remaining_time": "0:00:40", "throughput": 1162.85, "total_tokens": 3621088}
|
| 3818 |
+
{"current_steps": 18995, "total_steps": 19240, "loss": 0.0069, "lr": 2.4895034166760865e-08, "epoch": 9.872661122661123, "percentage": 98.73, "elapsed_time": "0:51:54", "remaining_time": "0:00:40", "throughput": 1162.88, "total_tokens": 3622048}
|
| 3819 |
+
{"current_steps": 19000, "total_steps": 19240, "loss": 0.2013, "lr": 2.389348498552657e-08, "epoch": 9.875259875259875, "percentage": 98.75, "elapsed_time": "0:51:55", "remaining_time": "0:00:39", "throughput": 1162.91, "total_tokens": 3622976}
|
| 3820 |
+
{"current_steps": 19005, "total_steps": 19240, "loss": 0.0949, "lr": 2.2912488524393095e-08, "epoch": 9.877858627858627, "percentage": 98.78, "elapsed_time": "0:51:56", "remaining_time": "0:00:38", "throughput": 1162.95, "total_tokens": 3623968}
|
| 3821 |
+
{"current_steps": 19010, "total_steps": 19240, "loss": 0.0905, "lr": 2.1952045590620253e-08, "epoch": 9.880457380457381, "percentage": 98.8, "elapsed_time": "0:51:56", "remaining_time": "0:00:37", "throughput": 1162.99, "total_tokens": 3624960}
|
| 3822 |
+
{"current_steps": 19015, "total_steps": 19240, "loss": 0.0868, "lr": 2.101215697455361e-08, "epoch": 9.883056133056133, "percentage": 98.83, "elapsed_time": "0:51:57", "remaining_time": "0:00:36", "throughput": 1163.03, "total_tokens": 3625952}
|
| 3823 |
+
{"current_steps": 19020, "total_steps": 19240, "loss": 0.0121, "lr": 2.0092823449618935e-08, "epoch": 9.885654885654885, "percentage": 98.86, "elapsed_time": "0:51:58", "remaining_time": "0:00:36", "throughput": 1163.07, "total_tokens": 3626944}
|
| 3824 |
+
{"current_steps": 19025, "total_steps": 19240, "loss": 0.2486, "lr": 1.9194045772336077e-08, "epoch": 9.888253638253639, "percentage": 98.88, "elapsed_time": "0:51:59", "remaining_time": "0:00:35", "throughput": 1163.12, "total_tokens": 3627968}
|
| 3825 |
+
{"current_steps": 19030, "total_steps": 19240, "loss": 0.1517, "lr": 1.831582468229953e-08, "epoch": 9.890852390852391, "percentage": 98.91, "elapsed_time": "0:51:59", "remaining_time": "0:00:34", "throughput": 1163.15, "total_tokens": 3628928}
|
| 3826 |
+
{"current_steps": 19035, "total_steps": 19240, "loss": 0.0127, "lr": 1.7458160902197872e-08, "epoch": 9.893451143451143, "percentage": 98.93, "elapsed_time": "0:52:00", "remaining_time": "0:00:33", "throughput": 1163.18, "total_tokens": 3629888}
|
| 3827 |
+
{"current_steps": 19040, "total_steps": 19240, "loss": 0.1736, "lr": 1.6621055137797105e-08, "epoch": 9.896049896049895, "percentage": 98.96, "elapsed_time": "0:52:01", "remaining_time": "0:00:32", "throughput": 1163.24, "total_tokens": 3630944}
|
| 3828 |
+
{"current_steps": 19045, "total_steps": 19240, "loss": 0.1473, "lr": 1.5804508077946202e-08, "epoch": 9.89864864864865, "percentage": 98.99, "elapsed_time": "0:52:02", "remaining_time": "0:00:31", "throughput": 1163.27, "total_tokens": 3631904}
|
| 3829 |
+
{"current_steps": 19050, "total_steps": 19240, "loss": 0.0553, "lr": 1.500852039458267e-08, "epoch": 9.901247401247401, "percentage": 99.01, "elapsed_time": "0:52:02", "remaining_time": "0:00:31", "throughput": 1163.3, "total_tokens": 3632832}
|
| 3830 |
+
{"current_steps": 19055, "total_steps": 19240, "loss": 0.0066, "lr": 1.4233092742713116e-08, "epoch": 9.903846153846153, "percentage": 99.04, "elapsed_time": "0:52:03", "remaining_time": "0:00:30", "throughput": 1163.32, "total_tokens": 3633760}
|
| 3831 |
+
{"current_steps": 19060, "total_steps": 19240, "loss": 0.0559, "lr": 1.3478225760441e-08, "epoch": 9.906444906444907, "percentage": 99.06, "elapsed_time": "0:52:04", "remaining_time": "0:00:29", "throughput": 1163.36, "total_tokens": 3634752}
|
| 3832 |
+
{"current_steps": 19065, "total_steps": 19240, "loss": 0.1236, "lr": 1.2743920068938874e-08, "epoch": 9.90904365904366, "percentage": 99.09, "elapsed_time": "0:52:05", "remaining_time": "0:00:28", "throughput": 1163.38, "total_tokens": 3635680}
|
| 3833 |
+
{"current_steps": 19070, "total_steps": 19240, "loss": 0.31, "lr": 1.203017627246228e-08, "epoch": 9.911642411642411, "percentage": 99.12, "elapsed_time": "0:52:05", "remaining_time": "0:00:27", "throughput": 1163.42, "total_tokens": 3636672}
|
| 3834 |
+
{"current_steps": 19075, "total_steps": 19240, "loss": 0.008, "lr": 1.1336994958349723e-08, "epoch": 9.914241164241163, "percentage": 99.14, "elapsed_time": "0:52:06", "remaining_time": "0:00:27", "throughput": 1163.45, "total_tokens": 3637632}
|
| 3835 |
+
{"current_steps": 19080, "total_steps": 19240, "loss": 0.0782, "lr": 1.0664376697017142e-08, "epoch": 9.916839916839917, "percentage": 99.17, "elapsed_time": "0:52:07", "remaining_time": "0:00:26", "throughput": 1163.49, "total_tokens": 3638592}
|
| 3836 |
+
{"current_steps": 19085, "total_steps": 19240, "loss": 0.2141, "lr": 1.0012322041960676e-08, "epoch": 9.91943866943867, "percentage": 99.19, "elapsed_time": "0:52:08", "remaining_time": "0:00:25", "throughput": 1163.52, "total_tokens": 3639552}
|
| 3837 |
+
{"current_steps": 19090, "total_steps": 19240, "loss": 0.2006, "lr": 9.38083152974556e-09, "epoch": 9.922037422037421, "percentage": 99.22, "elapsed_time": "0:52:08", "remaining_time": "0:00:24", "throughput": 1163.54, "total_tokens": 3640480}
|
| 3838 |
+
{"current_steps": 19095, "total_steps": 19240, "loss": 0.006, "lr": 8.76990568003111e-09, "epoch": 9.924636174636175, "percentage": 99.25, "elapsed_time": "0:52:09", "remaining_time": "0:00:23", "throughput": 1163.58, "total_tokens": 3641472}
|
| 3839 |
+
{"current_steps": 19100, "total_steps": 19240, "loss": 0.2006, "lr": 8.17954499554019e-09, "epoch": 9.927234927234927, "percentage": 99.27, "elapsed_time": "0:52:10", "remaining_time": "0:00:22", "throughput": 1163.62, "total_tokens": 3642464}
|
| 3840 |
+
{"current_steps": 19105, "total_steps": 19240, "loss": 0.1495, "lr": 7.609749962081413e-09, "epoch": 9.92983367983368, "percentage": 99.3, "elapsed_time": "0:52:11", "remaining_time": "0:00:22", "throughput": 1163.66, "total_tokens": 3643456}
|
| 3841 |
+
{"current_steps": 19110, "total_steps": 19240, "loss": 0.0927, "lr": 7.060521048532498e-09, "epoch": 9.932432432432432, "percentage": 99.32, "elapsed_time": "0:52:11", "remaining_time": "0:00:21", "throughput": 1163.67, "total_tokens": 3644352}
|
| 3842 |
+
{"current_steps": 19115, "total_steps": 19240, "loss": 0.0744, "lr": 6.5318587068541325e-09, "epoch": 9.935031185031185, "percentage": 99.35, "elapsed_time": "0:52:12", "remaining_time": "0:00:20", "throughput": 1163.71, "total_tokens": 3645344}
|
| 3843 |
+
{"current_steps": 19120, "total_steps": 19240, "loss": 0.1918, "lr": 6.023763372076108e-09, "epoch": 9.937629937629938, "percentage": 99.38, "elapsed_time": "0:52:13", "remaining_time": "0:00:19", "throughput": 1163.75, "total_tokens": 3646336}
|
| 3844 |
+
{"current_steps": 19125, "total_steps": 19240, "loss": 0.0743, "lr": 5.536235462313965e-09, "epoch": 9.94022869022869, "percentage": 99.4, "elapsed_time": "0:52:14", "remaining_time": "0:00:18", "throughput": 1163.78, "total_tokens": 3647296}
|
| 3845 |
+
{"current_steps": 19130, "total_steps": 19240, "loss": 0.0255, "lr": 5.069275378746796e-09, "epoch": 9.942827442827443, "percentage": 99.43, "elapsed_time": "0:52:14", "remaining_time": "0:00:18", "throughput": 1163.8, "total_tokens": 3648224}
|
| 3846 |
+
{"current_steps": 19135, "total_steps": 19240, "loss": 0.0087, "lr": 4.622883505636666e-09, "epoch": 9.945426195426196, "percentage": 99.45, "elapsed_time": "0:52:15", "remaining_time": "0:00:17", "throughput": 1163.82, "total_tokens": 3649152}
|
| 3847 |
+
{"current_steps": 19140, "total_steps": 19240, "loss": 0.0071, "lr": 4.197060210317516e-09, "epoch": 9.948024948024948, "percentage": 99.48, "elapsed_time": "0:52:16", "remaining_time": "0:00:16", "throughput": 1163.84, "total_tokens": 3650048}
|
| 3848 |
+
{"current_steps": 19145, "total_steps": 19240, "loss": 0.2545, "lr": 3.791805843195162e-09, "epoch": 9.950623700623701, "percentage": 99.51, "elapsed_time": "0:52:16", "remaining_time": "0:00:15", "throughput": 1163.85, "total_tokens": 3650944}
|
| 3849 |
+
{"current_steps": 19150, "total_steps": 19240, "loss": 0.1081, "lr": 3.4071207377500693e-09, "epoch": 9.953222453222454, "percentage": 99.53, "elapsed_time": "0:52:17", "remaining_time": "0:00:14", "throughput": 1163.89, "total_tokens": 3651936}
|
| 3850 |
+
{"current_steps": 19155, "total_steps": 19240, "loss": 0.1354, "lr": 3.043005210542904e-09, "epoch": 9.955821205821206, "percentage": 99.56, "elapsed_time": "0:52:18", "remaining_time": "0:00:13", "throughput": 1163.91, "total_tokens": 3652864}
|
| 3851 |
+
{"current_steps": 19160, "total_steps": 19240, "loss": 0.1587, "lr": 2.6994595612006566e-09, "epoch": 9.958419958419958, "percentage": 99.58, "elapsed_time": "0:52:19", "remaining_time": "0:00:13", "throughput": 1163.95, "total_tokens": 3653856}
|
| 3852 |
+
{"current_steps": 19165, "total_steps": 19240, "loss": 0.134, "lr": 2.376484072424967e-09, "epoch": 9.961018711018712, "percentage": 99.61, "elapsed_time": "0:52:19", "remaining_time": "0:00:12", "throughput": 1163.97, "total_tokens": 3654784}
|
| 3853 |
+
{"current_steps": 19170, "total_steps": 19240, "loss": 0.0058, "lr": 2.074079009989349e-09, "epoch": 9.963617463617464, "percentage": 99.64, "elapsed_time": "0:52:20", "remaining_time": "0:00:11", "throughput": 1163.99, "total_tokens": 3655712}
|
| 3854 |
+
{"current_steps": 19175, "total_steps": 19240, "loss": 0.0583, "lr": 1.7922446227447432e-09, "epoch": 9.966216216216216, "percentage": 99.66, "elapsed_time": "0:52:21", "remaining_time": "0:00:10", "throughput": 1164.03, "total_tokens": 3656704}
|
| 3855 |
+
{"current_steps": 19180, "total_steps": 19240, "loss": 0.0214, "lr": 1.5309811426056364e-09, "epoch": 9.96881496881497, "percentage": 99.69, "elapsed_time": "0:52:22", "remaining_time": "0:00:09", "throughput": 1164.07, "total_tokens": 3657664}
|
| 3856 |
+
{"current_steps": 19185, "total_steps": 19240, "loss": 0.0695, "lr": 1.2902887845722688e-09, "epoch": 9.971413721413722, "percentage": 99.71, "elapsed_time": "0:52:22", "remaining_time": "0:00:09", "throughput": 1164.11, "total_tokens": 3658656}
|
| 3857 |
+
{"current_steps": 19190, "total_steps": 19240, "loss": 0.088, "lr": 1.070167746702877e-09, "epoch": 9.974012474012474, "percentage": 99.74, "elapsed_time": "0:52:23", "remaining_time": "0:00:08", "throughput": 1164.14, "total_tokens": 3659616}
|
| 3858 |
+
{"current_steps": 19195, "total_steps": 19240, "loss": 0.0422, "lr": 8.70618210138674e-10, "epoch": 9.976611226611226, "percentage": 99.77, "elapsed_time": "0:52:24", "remaining_time": "0:00:07", "throughput": 1164.19, "total_tokens": 3660640}
|
| 3859 |
+
{"current_steps": 19200, "total_steps": 19240, "loss": 0.0075, "lr": 6.916403390844206e-10, "epoch": 9.97920997920998, "percentage": 99.79, "elapsed_time": "0:52:25", "remaining_time": "0:00:06", "throughput": 1164.21, "total_tokens": 3661568}
|
| 3860 |
+
{"current_steps": 19205, "total_steps": 19240, "loss": 0.0077, "lr": 5.332342808223034e-10, "epoch": 9.981808731808732, "percentage": 99.82, "elapsed_time": "0:52:25", "remaining_time": "0:00:05", "throughput": 1164.25, "total_tokens": 3662560}
|
| 3861 |
+
{"current_steps": 19210, "total_steps": 19240, "loss": 0.1795, "lr": 3.9540016570083215e-10, "epoch": 9.984407484407484, "percentage": 99.84, "elapsed_time": "0:52:26", "remaining_time": "0:00:04", "throughput": 1164.28, "total_tokens": 3663520}
|
| 3862 |
+
{"current_steps": 19215, "total_steps": 19240, "loss": 0.0055, "lr": 2.7813810714871767e-10, "epoch": 9.987006237006238, "percentage": 99.87, "elapsed_time": "0:52:27", "remaining_time": "0:00:04", "throughput": 1164.29, "total_tokens": 3664416}
|
| 3863 |
+
{"current_steps": 19220, "total_steps": 19240, "loss": 0.0823, "lr": 1.8144820165544307e-10, "epoch": 9.98960498960499, "percentage": 99.9, "elapsed_time": "0:52:28", "remaining_time": "0:00:03", "throughput": 1164.31, "total_tokens": 3665312}
|
| 3864 |
+
{"current_steps": 19225, "total_steps": 19240, "loss": 0.0063, "lr": 1.0533052878791694e-10, "epoch": 9.992203742203742, "percentage": 99.92, "elapsed_time": "0:52:28", "remaining_time": "0:00:02", "throughput": 1164.34, "total_tokens": 3666272}
|
| 3865 |
+
{"current_steps": 19230, "total_steps": 19240, "loss": 0.0063, "lr": 4.978515118214677e-11, "epoch": 9.994802494802494, "percentage": 99.95, "elapsed_time": "0:52:29", "remaining_time": "0:00:01", "throughput": 1164.36, "total_tokens": 3667200}
|
| 3866 |
+
{"current_steps": 19235, "total_steps": 19240, "loss": 0.0692, "lr": 1.4812114548790057e-11, "epoch": 9.997401247401248, "percentage": 99.97, "elapsed_time": "0:52:30", "remaining_time": "0:00:00", "throughput": 1164.41, "total_tokens": 3668256}
|
| 3867 |
+
{"current_steps": 19240, "total_steps": 19240, "loss": 0.1406, "lr": 4.114476648275911e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:52:31", "remaining_time": "0:00:00", "throughput": 1164.4, "total_tokens": 3669168}
|
| 3868 |
+
{"current_steps": 19240, "total_steps": 19240, "eval_loss": 0.20372864603996277, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:52:44", "remaining_time": "0:00:00", "throughput": 1159.34, "total_tokens": 3669168}
|
| 3869 |
+
{"current_steps": 19240, "total_steps": 19240, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:52:46", "remaining_time": "0:00:00", "throughput": 1158.78, "total_tokens": 3669168}
|