Training in progress, step 21210
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +209 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 26214528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42d96fd662f97f5d59dd95cb31729f8c23a14b8d1952e97f7625b975843bfe5c
|
| 3 |
size 26214528
|
trainer_log.jsonl
CHANGED
|
@@ -4051,3 +4051,212 @@
|
|
| 4051 |
{"current_steps": 20160, "total_steps": 21210, "loss": 0.332, "lr": 3.730489142385857e-07, "epoch": 9.504950495049505, "percentage": 95.05, "elapsed_time": "1:27:59", "remaining_time": "0:04:34", "throughput": 3843.23, "total_tokens": 20291104}
|
| 4052 |
{"current_steps": 20165, "total_steps": 21210, "loss": 0.3198, "lr": 3.695166400837669e-07, "epoch": 9.507307873644507, "percentage": 95.07, "elapsed_time": "1:28:00", "remaining_time": "0:04:33", "throughput": 3843.3, "total_tokens": 20295392}
|
| 4053 |
{"current_steps": 20170, "total_steps": 21210, "loss": 0.3431, "lr": 3.6600104401876834e-07, "epoch": 9.50966525223951, "percentage": 95.1, "elapsed_time": "1:28:01", "remaining_time": "0:04:32", "throughput": 3843.38, "total_tokens": 20300192}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4051 |
{"current_steps": 20160, "total_steps": 21210, "loss": 0.332, "lr": 3.730489142385857e-07, "epoch": 9.504950495049505, "percentage": 95.05, "elapsed_time": "1:27:59", "remaining_time": "0:04:34", "throughput": 3843.23, "total_tokens": 20291104}
|
| 4052 |
{"current_steps": 20165, "total_steps": 21210, "loss": 0.3198, "lr": 3.695166400837669e-07, "epoch": 9.507307873644507, "percentage": 95.07, "elapsed_time": "1:28:00", "remaining_time": "0:04:33", "throughput": 3843.3, "total_tokens": 20295392}
|
| 4053 |
{"current_steps": 20170, "total_steps": 21210, "loss": 0.3431, "lr": 3.6600104401876834e-07, "epoch": 9.50966525223951, "percentage": 95.1, "elapsed_time": "1:28:01", "remaining_time": "0:04:32", "throughput": 3843.38, "total_tokens": 20300192}
|
| 4054 |
+
{"current_steps": 20175, "total_steps": 21210, "loss": 0.3251, "lr": 3.6250212842410814e-07, "epoch": 9.512022630834512, "percentage": 95.12, "elapsed_time": "1:28:02", "remaining_time": "0:04:31", "throughput": 3843.45, "total_tokens": 20304672}
|
| 4055 |
+
{"current_steps": 20180, "total_steps": 21210, "loss": 0.3622, "lr": 3.590198956690216e-07, "epoch": 9.514380009429514, "percentage": 95.14, "elapsed_time": "1:28:04", "remaining_time": "0:04:29", "throughput": 3843.51, "total_tokens": 20309408}
|
| 4056 |
+
{"current_steps": 20185, "total_steps": 21210, "loss": 0.2888, "lr": 3.555543481114337e-07, "epoch": 9.516737388024517, "percentage": 95.17, "elapsed_time": "1:28:05", "remaining_time": "0:04:28", "throughput": 3843.68, "total_tokens": 20316320}
|
| 4057 |
+
{"current_steps": 20190, "total_steps": 21210, "loss": 0.3651, "lr": 3.521054880979785e-07, "epoch": 9.519094766619519, "percentage": 95.19, "elapsed_time": "1:28:06", "remaining_time": "0:04:27", "throughput": 3843.78, "total_tokens": 20321248}
|
| 4058 |
+
{"current_steps": 20195, "total_steps": 21210, "loss": 0.3454, "lr": 3.486733179639906e-07, "epoch": 9.521452145214521, "percentage": 95.21, "elapsed_time": "1:28:08", "remaining_time": "0:04:25", "throughput": 3843.9, "total_tokens": 20326944}
|
| 4059 |
+
{"current_steps": 20200, "total_steps": 21210, "loss": 0.3416, "lr": 3.452578400335027e-07, "epoch": 9.523809523809524, "percentage": 95.24, "elapsed_time": "1:28:09", "remaining_time": "0:04:24", "throughput": 3844.0, "total_tokens": 20332096}
|
| 4060 |
+
{"current_steps": 20205, "total_steps": 21210, "loss": 0.3481, "lr": 3.4185905661924534e-07, "epoch": 9.526166902404526, "percentage": 95.26, "elapsed_time": "1:28:10", "remaining_time": "0:04:23", "throughput": 3844.1, "total_tokens": 20337216}
|
| 4061 |
+
{"current_steps": 20210, "total_steps": 21210, "loss": 0.3569, "lr": 3.3847697002264147e-07, "epoch": 9.528524280999529, "percentage": 95.29, "elapsed_time": "1:28:11", "remaining_time": "0:04:21", "throughput": 3844.21, "total_tokens": 20342336}
|
| 4062 |
+
{"current_steps": 20215, "total_steps": 21210, "loss": 0.3023, "lr": 3.351115825338119e-07, "epoch": 9.530881659594531, "percentage": 95.31, "elapsed_time": "1:28:12", "remaining_time": "0:04:20", "throughput": 3844.33, "total_tokens": 20347616}
|
| 4063 |
+
{"current_steps": 20220, "total_steps": 21210, "loss": 0.3543, "lr": 3.3176289643157e-07, "epoch": 9.533239038189533, "percentage": 95.33, "elapsed_time": "1:28:13", "remaining_time": "0:04:19", "throughput": 3844.4, "total_tokens": 20352096}
|
| 4064 |
+
{"current_steps": 20225, "total_steps": 21210, "loss": 0.2687, "lr": 3.28430913983413e-07, "epoch": 9.535596416784536, "percentage": 95.36, "elapsed_time": "1:28:15", "remaining_time": "0:04:17", "throughput": 3844.49, "total_tokens": 20357376}
|
| 4065 |
+
{"current_steps": 20230, "total_steps": 21210, "loss": 0.2984, "lr": 3.2511563744553884e-07, "epoch": 9.537953795379538, "percentage": 95.38, "elapsed_time": "1:28:16", "remaining_time": "0:04:16", "throughput": 3844.56, "total_tokens": 20361824}
|
| 4066 |
+
{"current_steps": 20235, "total_steps": 21210, "loss": 0.3402, "lr": 3.218170690628214e-07, "epoch": 9.54031117397454, "percentage": 95.4, "elapsed_time": "1:28:17", "remaining_time": "0:04:15", "throughput": 3844.61, "total_tokens": 20366176}
|
| 4067 |
+
{"current_steps": 20240, "total_steps": 21210, "loss": 0.3714, "lr": 3.18535211068835e-07, "epoch": 9.542668552569543, "percentage": 95.43, "elapsed_time": "1:28:18", "remaining_time": "0:04:13", "throughput": 3844.71, "total_tokens": 20372352}
|
| 4068 |
+
{"current_steps": 20245, "total_steps": 21210, "loss": 0.3038, "lr": 3.152700656858243e-07, "epoch": 9.545025931164545, "percentage": 95.45, "elapsed_time": "1:28:19", "remaining_time": "0:04:12", "throughput": 3844.8, "total_tokens": 20377216}
|
| 4069 |
+
{"current_steps": 20250, "total_steps": 21210, "loss": 0.3052, "lr": 3.1202163512472905e-07, "epoch": 9.547383309759548, "percentage": 95.47, "elapsed_time": "1:28:21", "remaining_time": "0:04:11", "throughput": 3844.9, "total_tokens": 20382368}
|
| 4070 |
+
{"current_steps": 20255, "total_steps": 21210, "loss": 0.2772, "lr": 3.087899215851592e-07, "epoch": 9.54974068835455, "percentage": 95.5, "elapsed_time": "1:28:22", "remaining_time": "0:04:09", "throughput": 3844.98, "total_tokens": 20387072}
|
| 4071 |
+
{"current_steps": 20260, "total_steps": 21210, "loss": 0.3252, "lr": 3.055749272554198e-07, "epoch": 9.552098066949553, "percentage": 95.52, "elapsed_time": "1:28:23", "remaining_time": "0:04:08", "throughput": 3845.09, "total_tokens": 20392256}
|
| 4072 |
+
{"current_steps": 20265, "total_steps": 21210, "loss": 0.34, "lr": 3.0237665431247784e-07, "epoch": 9.554455445544555, "percentage": 95.54, "elapsed_time": "1:28:24", "remaining_time": "0:04:07", "throughput": 3845.16, "total_tokens": 20396960}
|
| 4073 |
+
{"current_steps": 20270, "total_steps": 21210, "loss": 0.3027, "lr": 2.9919510492199267e-07, "epoch": 9.556812824139557, "percentage": 95.57, "elapsed_time": "1:28:25", "remaining_time": "0:04:06", "throughput": 3845.23, "total_tokens": 20401472}
|
| 4074 |
+
{"current_steps": 20275, "total_steps": 21210, "loss": 0.3046, "lr": 2.960302812382909e-07, "epoch": 9.55917020273456, "percentage": 95.59, "elapsed_time": "1:28:26", "remaining_time": "0:04:04", "throughput": 3845.28, "total_tokens": 20406144}
|
| 4075 |
+
{"current_steps": 20280, "total_steps": 21210, "loss": 0.3186, "lr": 2.928821854043778e-07, "epoch": 9.561527581329562, "percentage": 95.62, "elapsed_time": "1:28:27", "remaining_time": "0:04:03", "throughput": 3845.34, "total_tokens": 20410816}
|
| 4076 |
+
{"current_steps": 20285, "total_steps": 21210, "loss": 0.3148, "lr": 2.8975081955192605e-07, "epoch": 9.563884959924565, "percentage": 95.64, "elapsed_time": "1:28:29", "remaining_time": "0:04:02", "throughput": 3845.44, "total_tokens": 20415712}
|
| 4077 |
+
{"current_steps": 20290, "total_steps": 21210, "loss": 0.3277, "lr": 2.8663618580128947e-07, "epoch": 9.566242338519567, "percentage": 95.66, "elapsed_time": "1:28:30", "remaining_time": "0:04:00", "throughput": 3845.52, "total_tokens": 20420704}
|
| 4078 |
+
{"current_steps": 20295, "total_steps": 21210, "loss": 0.3613, "lr": 2.8353828626148107e-07, "epoch": 9.56859971711457, "percentage": 95.69, "elapsed_time": "1:28:31", "remaining_time": "0:03:59", "throughput": 3845.6, "total_tokens": 20425504}
|
| 4079 |
+
{"current_steps": 20300, "total_steps": 21210, "loss": 0.3327, "lr": 2.80457123030195e-07, "epoch": 9.570957095709572, "percentage": 95.71, "elapsed_time": "1:28:32", "remaining_time": "0:03:58", "throughput": 3845.7, "total_tokens": 20430656}
|
| 4080 |
+
{"current_steps": 20305, "total_steps": 21210, "loss": 0.3347, "lr": 2.7739269819377633e-07, "epoch": 9.573314474304574, "percentage": 95.73, "elapsed_time": "1:28:33", "remaining_time": "0:03:56", "throughput": 3845.77, "total_tokens": 20435680}
|
| 4081 |
+
{"current_steps": 20310, "total_steps": 21210, "loss": 0.3742, "lr": 2.743450138272513e-07, "epoch": 9.575671852899575, "percentage": 95.76, "elapsed_time": "1:28:34", "remaining_time": "0:03:55", "throughput": 3845.84, "total_tokens": 20440512}
|
| 4082 |
+
{"current_steps": 20315, "total_steps": 21210, "loss": 0.3521, "lr": 2.713140719943025e-07, "epoch": 9.578029231494579, "percentage": 95.78, "elapsed_time": "1:28:36", "remaining_time": "0:03:54", "throughput": 3845.92, "total_tokens": 20445472}
|
| 4083 |
+
{"current_steps": 20320, "total_steps": 21210, "loss": 0.3109, "lr": 2.682998747472826e-07, "epoch": 9.58038661008958, "percentage": 95.8, "elapsed_time": "1:28:37", "remaining_time": "0:03:52", "throughput": 3846.06, "total_tokens": 20451232}
|
| 4084 |
+
{"current_steps": 20325, "total_steps": 21210, "loss": 0.2647, "lr": 2.653024241271951e-07, "epoch": 9.582743988684582, "percentage": 95.83, "elapsed_time": "1:28:38", "remaining_time": "0:03:51", "throughput": 3846.15, "total_tokens": 20456032}
|
| 4085 |
+
{"current_steps": 20330, "total_steps": 21210, "loss": 0.4058, "lr": 2.6232172216371086e-07, "epoch": 9.585101367279584, "percentage": 95.85, "elapsed_time": "1:28:39", "remaining_time": "0:03:50", "throughput": 3846.24, "total_tokens": 20461056}
|
| 4086 |
+
{"current_steps": 20335, "total_steps": 21210, "loss": 0.3103, "lr": 2.5935777087515987e-07, "epoch": 9.587458745874587, "percentage": 95.87, "elapsed_time": "1:28:40", "remaining_time": "0:03:48", "throughput": 3846.33, "total_tokens": 20466144}
|
| 4087 |
+
{"current_steps": 20340, "total_steps": 21210, "loss": 0.3152, "lr": 2.5641057226853116e-07, "epoch": 9.58981612446959, "percentage": 95.9, "elapsed_time": "1:28:42", "remaining_time": "0:03:47", "throughput": 3846.46, "total_tokens": 20472192}
|
| 4088 |
+
{"current_steps": 20345, "total_steps": 21210, "loss": 0.3283, "lr": 2.5348012833946445e-07, "epoch": 9.592173503064592, "percentage": 95.92, "elapsed_time": "1:28:43", "remaining_time": "0:03:46", "throughput": 3846.53, "total_tokens": 20476992}
|
| 4089 |
+
{"current_steps": 20350, "total_steps": 21210, "loss": 0.3001, "lr": 2.505664410722558e-07, "epoch": 9.594530881659594, "percentage": 95.95, "elapsed_time": "1:28:44", "remaining_time": "0:03:45", "throughput": 3846.67, "total_tokens": 20482848}
|
| 4090 |
+
{"current_steps": 20355, "total_steps": 21210, "loss": 0.3292, "lr": 2.4766951243985756e-07, "epoch": 9.596888260254596, "percentage": 95.97, "elapsed_time": "1:28:46", "remaining_time": "0:03:43", "throughput": 3846.75, "total_tokens": 20488000}
|
| 4091 |
+
{"current_steps": 20360, "total_steps": 21210, "loss": 0.3302, "lr": 2.447893444038757e-07, "epoch": 9.599245638849599, "percentage": 95.99, "elapsed_time": "1:28:47", "remaining_time": "0:03:42", "throughput": 3846.84, "total_tokens": 20492864}
|
| 4092 |
+
{"current_steps": 20365, "total_steps": 21210, "loss": 0.3472, "lr": 2.4192593891456395e-07, "epoch": 9.601603017444601, "percentage": 96.02, "elapsed_time": "1:28:48", "remaining_time": "0:03:41", "throughput": 3846.9, "total_tokens": 20497216}
|
| 4093 |
+
{"current_steps": 20370, "total_steps": 21210, "loss": 0.2974, "lr": 2.390792979108214e-07, "epoch": 9.603960396039604, "percentage": 96.04, "elapsed_time": "1:28:49", "remaining_time": "0:03:39", "throughput": 3847.0, "total_tokens": 20502560}
|
| 4094 |
+
{"current_steps": 20375, "total_steps": 21210, "loss": 0.2823, "lr": 2.362494233202034e-07, "epoch": 9.606317774634606, "percentage": 96.06, "elapsed_time": "1:28:50", "remaining_time": "0:03:38", "throughput": 3847.1, "total_tokens": 20508000}
|
| 4095 |
+
{"current_steps": 20380, "total_steps": 21210, "loss": 0.3536, "lr": 2.3343631705890766e-07, "epoch": 9.608675153229608, "percentage": 96.09, "elapsed_time": "1:28:52", "remaining_time": "0:03:37", "throughput": 3847.29, "total_tokens": 20514432}
|
| 4096 |
+
{"current_steps": 20385, "total_steps": 21210, "loss": 0.2743, "lr": 2.3063998103177998e-07, "epoch": 9.61103253182461, "percentage": 96.11, "elapsed_time": "1:28:53", "remaining_time": "0:03:35", "throughput": 3847.4, "total_tokens": 20519872}
|
| 4097 |
+
{"current_steps": 20390, "total_steps": 21210, "loss": 0.3492, "lr": 2.2786041713230565e-07, "epoch": 9.613389910419613, "percentage": 96.13, "elapsed_time": "1:28:54", "remaining_time": "0:03:34", "throughput": 3847.44, "total_tokens": 20523872}
|
| 4098 |
+
{"current_steps": 20395, "total_steps": 21210, "loss": 0.2843, "lr": 2.2509762724262085e-07, "epoch": 9.615747289014616, "percentage": 96.16, "elapsed_time": "1:28:55", "remaining_time": "0:03:33", "throughput": 3847.5, "total_tokens": 20528320}
|
| 4099 |
+
{"current_steps": 20400, "total_steps": 21210, "loss": 0.3231, "lr": 2.2235161323349573e-07, "epoch": 9.618104667609618, "percentage": 96.18, "elapsed_time": "1:28:56", "remaining_time": "0:03:31", "throughput": 3847.64, "total_tokens": 20534016}
|
| 4100 |
+
{"current_steps": 20405, "total_steps": 21210, "loss": 0.3169, "lr": 2.196223769643485e-07, "epoch": 9.62046204620462, "percentage": 96.2, "elapsed_time": "1:28:57", "remaining_time": "0:03:30", "throughput": 3847.75, "total_tokens": 20539008}
|
| 4101 |
+
{"current_steps": 20410, "total_steps": 21210, "loss": 0.3324, "lr": 2.1690992028322866e-07, "epoch": 9.622819424799623, "percentage": 96.23, "elapsed_time": "1:28:59", "remaining_time": "0:03:29", "throughput": 3847.87, "total_tokens": 20544416}
|
| 4102 |
+
{"current_steps": 20415, "total_steps": 21210, "loss": 0.3486, "lr": 2.1421424502683086e-07, "epoch": 9.625176803394625, "percentage": 96.25, "elapsed_time": "1:29:00", "remaining_time": "0:03:27", "throughput": 3847.98, "total_tokens": 20549664}
|
| 4103 |
+
{"current_steps": 20420, "total_steps": 21210, "loss": 0.3373, "lr": 2.1153535302047832e-07, "epoch": 9.627534181989628, "percentage": 96.28, "elapsed_time": "1:29:01", "remaining_time": "0:03:26", "throughput": 3848.09, "total_tokens": 20554848}
|
| 4104 |
+
{"current_steps": 20425, "total_steps": 21210, "loss": 0.3206, "lr": 2.0887324607813952e-07, "epoch": 9.62989156058463, "percentage": 96.3, "elapsed_time": "1:29:02", "remaining_time": "0:03:25", "throughput": 3848.16, "total_tokens": 20559840}
|
| 4105 |
+
{"current_steps": 20430, "total_steps": 21210, "loss": 0.3144, "lr": 2.0622792600241135e-07, "epoch": 9.632248939179632, "percentage": 96.32, "elapsed_time": "1:29:03", "remaining_time": "0:03:24", "throughput": 3848.2, "total_tokens": 20563840}
|
| 4106 |
+
{"current_steps": 20435, "total_steps": 21210, "loss": 0.3151, "lr": 2.0359939458452216e-07, "epoch": 9.634606317774635, "percentage": 96.35, "elapsed_time": "1:29:04", "remaining_time": "0:03:22", "throughput": 3848.25, "total_tokens": 20568448}
|
| 4107 |
+
{"current_steps": 20440, "total_steps": 21210, "loss": 0.3421, "lr": 2.0098765360433703e-07, "epoch": 9.636963696369637, "percentage": 96.37, "elapsed_time": "1:29:06", "remaining_time": "0:03:21", "throughput": 3848.36, "total_tokens": 20573824}
|
| 4108 |
+
{"current_steps": 20445, "total_steps": 21210, "loss": 0.3247, "lr": 1.9839270483034966e-07, "epoch": 9.63932107496464, "percentage": 96.39, "elapsed_time": "1:29:07", "remaining_time": "0:03:20", "throughput": 3848.41, "total_tokens": 20577984}
|
| 4109 |
+
{"current_steps": 20450, "total_steps": 21210, "loss": 0.2998, "lr": 1.9581455001968506e-07, "epoch": 9.641678453559642, "percentage": 96.42, "elapsed_time": "1:29:08", "remaining_time": "0:03:18", "throughput": 3848.53, "total_tokens": 20583520}
|
| 4110 |
+
{"current_steps": 20455, "total_steps": 21210, "loss": 0.3139, "lr": 1.9325319091808847e-07, "epoch": 9.644035832154644, "percentage": 96.44, "elapsed_time": "1:29:09", "remaining_time": "0:03:17", "throughput": 3848.59, "total_tokens": 20587840}
|
| 4111 |
+
{"current_steps": 20460, "total_steps": 21210, "loss": 0.338, "lr": 1.9070862925994194e-07, "epoch": 9.646393210749647, "percentage": 96.46, "elapsed_time": "1:29:10", "remaining_time": "0:03:16", "throughput": 3848.68, "total_tokens": 20592480}
|
| 4112 |
+
{"current_steps": 20465, "total_steps": 21210, "loss": 0.3435, "lr": 1.8818086676825052e-07, "epoch": 9.64875058934465, "percentage": 96.49, "elapsed_time": "1:29:11", "remaining_time": "0:03:14", "throughput": 3848.71, "total_tokens": 20596096}
|
| 4113 |
+
{"current_steps": 20470, "total_steps": 21210, "loss": 0.2951, "lr": 1.8566990515464232e-07, "epoch": 9.651107967939652, "percentage": 96.51, "elapsed_time": "1:29:12", "remaining_time": "0:03:13", "throughput": 3848.81, "total_tokens": 20600928}
|
| 4114 |
+
{"current_steps": 20475, "total_steps": 21210, "loss": 0.3601, "lr": 1.8317574611936839e-07, "epoch": 9.653465346534654, "percentage": 96.53, "elapsed_time": "1:29:13", "remaining_time": "0:03:12", "throughput": 3848.86, "total_tokens": 20605024}
|
| 4115 |
+
{"current_steps": 20480, "total_steps": 21210, "loss": 0.3254, "lr": 1.8069839135130827e-07, "epoch": 9.655822725129656, "percentage": 96.56, "elapsed_time": "1:29:14", "remaining_time": "0:03:10", "throughput": 3848.97, "total_tokens": 20610560}
|
| 4116 |
+
{"current_steps": 20485, "total_steps": 21210, "loss": 0.2418, "lr": 1.7823784252795073e-07, "epoch": 9.658180103724659, "percentage": 96.58, "elapsed_time": "1:29:16", "remaining_time": "0:03:09", "throughput": 3849.1, "total_tokens": 20616608}
|
| 4117 |
+
{"current_steps": 20490, "total_steps": 21210, "loss": 0.3721, "lr": 1.757941013154213e-07, "epoch": 9.660537482319661, "percentage": 96.61, "elapsed_time": "1:29:17", "remaining_time": "0:03:08", "throughput": 3849.24, "total_tokens": 20622240}
|
| 4118 |
+
{"current_steps": 20495, "total_steps": 21210, "loss": 0.3318, "lr": 1.733671693684491e-07, "epoch": 9.662894860914664, "percentage": 96.63, "elapsed_time": "1:29:18", "remaining_time": "0:03:06", "throughput": 3849.34, "total_tokens": 20627584}
|
| 4119 |
+
{"current_steps": 20500, "total_steps": 21210, "loss": 0.2861, "lr": 1.7095704833038907e-07, "epoch": 9.665252239509666, "percentage": 96.65, "elapsed_time": "1:29:19", "remaining_time": "0:03:05", "throughput": 3849.41, "total_tokens": 20632480}
|
| 4120 |
+
{"current_steps": 20505, "total_steps": 21210, "loss": 0.3244, "lr": 1.685637398332135e-07, "epoch": 9.667609618104667, "percentage": 96.68, "elapsed_time": "1:29:21", "remaining_time": "0:03:04", "throughput": 3849.5, "total_tokens": 20637408}
|
| 4121 |
+
{"current_steps": 20510, "total_steps": 21210, "loss": 0.2909, "lr": 1.6618724549750387e-07, "epoch": 9.66996699669967, "percentage": 96.7, "elapsed_time": "1:29:22", "remaining_time": "0:03:03", "throughput": 3849.66, "total_tokens": 20643456}
|
| 4122 |
+
{"current_steps": 20515, "total_steps": 21210, "loss": 0.3467, "lr": 1.638275669324646e-07, "epoch": 9.672324375294671, "percentage": 96.72, "elapsed_time": "1:29:23", "remaining_time": "0:03:01", "throughput": 3849.73, "total_tokens": 20648224}
|
| 4123 |
+
{"current_steps": 20520, "total_steps": 21210, "loss": 0.3343, "lr": 1.6148470573590925e-07, "epoch": 9.674681753889674, "percentage": 96.75, "elapsed_time": "1:29:24", "remaining_time": "0:03:00", "throughput": 3849.85, "total_tokens": 20653824}
|
| 4124 |
+
{"current_steps": 20525, "total_steps": 21210, "loss": 0.3187, "lr": 1.5915866349426323e-07, "epoch": 9.677039132484676, "percentage": 96.77, "elapsed_time": "1:29:25", "remaining_time": "0:02:59", "throughput": 3849.9, "total_tokens": 20658048}
|
| 4125 |
+
{"current_steps": 20530, "total_steps": 21210, "loss": 0.2712, "lr": 1.5684944178256388e-07, "epoch": 9.679396511079679, "percentage": 96.79, "elapsed_time": "1:29:27", "remaining_time": "0:02:57", "throughput": 3850.01, "total_tokens": 20663744}
|
| 4126 |
+
{"current_steps": 20535, "total_steps": 21210, "loss": 0.3526, "lr": 1.5455704216446044e-07, "epoch": 9.681753889674681, "percentage": 96.82, "elapsed_time": "1:29:28", "remaining_time": "0:02:56", "throughput": 3850.12, "total_tokens": 20668992}
|
| 4127 |
+
{"current_steps": 20540, "total_steps": 21210, "loss": 0.3376, "lr": 1.522814661922084e-07, "epoch": 9.684111268269683, "percentage": 96.84, "elapsed_time": "1:29:29", "remaining_time": "0:02:55", "throughput": 3850.2, "total_tokens": 20673632}
|
| 4128 |
+
{"current_steps": 20545, "total_steps": 21210, "loss": 0.3323, "lr": 1.5002271540667523e-07, "epoch": 9.686468646864686, "percentage": 96.86, "elapsed_time": "1:29:30", "remaining_time": "0:02:53", "throughput": 3850.3, "total_tokens": 20678912}
|
| 4129 |
+
{"current_steps": 20550, "total_steps": 21210, "loss": 0.226, "lr": 1.4778079133733468e-07, "epoch": 9.688826025459688, "percentage": 96.89, "elapsed_time": "1:29:31", "remaining_time": "0:02:52", "throughput": 3850.38, "total_tokens": 20683680}
|
| 4130 |
+
{"current_steps": 20555, "total_steps": 21210, "loss": 0.324, "lr": 1.4555569550226133e-07, "epoch": 9.69118340405469, "percentage": 96.91, "elapsed_time": "1:29:32", "remaining_time": "0:02:51", "throughput": 3850.46, "total_tokens": 20688128}
|
| 4131 |
+
{"current_steps": 20560, "total_steps": 21210, "loss": 0.3399, "lr": 1.4334742940814162e-07, "epoch": 9.693540782649693, "percentage": 96.94, "elapsed_time": "1:29:34", "remaining_time": "0:02:49", "throughput": 3850.6, "total_tokens": 20693856}
|
| 4132 |
+
{"current_steps": 20565, "total_steps": 21210, "loss": 0.3378, "lr": 1.4115599455026273e-07, "epoch": 9.695898161244696, "percentage": 96.96, "elapsed_time": "1:29:35", "remaining_time": "0:02:48", "throughput": 3850.67, "total_tokens": 20698688}
|
| 4133 |
+
{"current_steps": 20570, "total_steps": 21210, "loss": 0.2956, "lr": 1.389813924125155e-07, "epoch": 9.698255539839698, "percentage": 96.98, "elapsed_time": "1:29:36", "remaining_time": "0:02:47", "throughput": 3850.76, "total_tokens": 20703840}
|
| 4134 |
+
{"current_steps": 20575, "total_steps": 21210, "loss": 0.2751, "lr": 1.368236244673915e-07, "epoch": 9.7006129184347, "percentage": 97.01, "elapsed_time": "1:29:37", "remaining_time": "0:02:45", "throughput": 3850.81, "total_tokens": 20708576}
|
| 4135 |
+
{"current_steps": 20580, "total_steps": 21210, "loss": 0.3192, "lr": 1.3468269217598585e-07, "epoch": 9.702970297029703, "percentage": 97.03, "elapsed_time": "1:29:38", "remaining_time": "0:02:44", "throughput": 3850.88, "total_tokens": 20713120}
|
| 4136 |
+
{"current_steps": 20585, "total_steps": 21210, "loss": 0.3197, "lr": 1.3255859698799168e-07, "epoch": 9.705327675624705, "percentage": 97.05, "elapsed_time": "1:29:39", "remaining_time": "0:02:43", "throughput": 3850.94, "total_tokens": 20717984}
|
| 4137 |
+
{"current_steps": 20590, "total_steps": 21210, "loss": 0.3116, "lr": 1.304513403417029e-07, "epoch": 9.707685054219708, "percentage": 97.08, "elapsed_time": "1:29:41", "remaining_time": "0:02:42", "throughput": 3851.05, "total_tokens": 20723648}
|
| 4138 |
+
{"current_steps": 20595, "total_steps": 21210, "loss": 0.3243, "lr": 1.283609236640143e-07, "epoch": 9.71004243281471, "percentage": 97.1, "elapsed_time": "1:29:42", "remaining_time": "0:02:40", "throughput": 3851.09, "total_tokens": 20728000}
|
| 4139 |
+
{"current_steps": 20600, "total_steps": 21210, "loss": 0.316, "lr": 1.262873483704047e-07, "epoch": 9.712399811409712, "percentage": 97.12, "elapsed_time": "1:29:43", "remaining_time": "0:02:39", "throughput": 3851.19, "total_tokens": 20733152}
|
| 4140 |
+
{"current_steps": 20605, "total_steps": 21210, "loss": 0.3429, "lr": 1.2423061586496477e-07, "epoch": 9.714757190004715, "percentage": 97.15, "elapsed_time": "1:29:44", "remaining_time": "0:02:38", "throughput": 3851.3, "total_tokens": 20738944}
|
| 4141 |
+
{"current_steps": 20610, "total_steps": 21210, "loss": 0.3153, "lr": 1.221907275403722e-07, "epoch": 9.717114568599717, "percentage": 97.17, "elapsed_time": "1:29:46", "remaining_time": "0:02:36", "throughput": 3851.4, "total_tokens": 20744192}
|
| 4142 |
+
{"current_steps": 20615, "total_steps": 21210, "loss": 0.3375, "lr": 1.201676847779054e-07, "epoch": 9.71947194719472, "percentage": 97.19, "elapsed_time": "1:29:47", "remaining_time": "0:02:35", "throughput": 3851.48, "total_tokens": 20748864}
|
| 4143 |
+
{"current_steps": 20620, "total_steps": 21210, "loss": 0.29, "lr": 1.1816148894742418e-07, "epoch": 9.721829325789722, "percentage": 97.22, "elapsed_time": "1:29:48", "remaining_time": "0:02:34", "throughput": 3851.56, "total_tokens": 20754144}
|
| 4144 |
+
{"current_steps": 20625, "total_steps": 21210, "loss": 0.309, "lr": 1.1617214140738908e-07, "epoch": 9.724186704384724, "percentage": 97.24, "elapsed_time": "1:29:49", "remaining_time": "0:02:32", "throughput": 3851.64, "total_tokens": 20758592}
|
| 4145 |
+
{"current_steps": 20630, "total_steps": 21210, "loss": 0.3538, "lr": 1.141996435048559e-07, "epoch": 9.726544082979727, "percentage": 97.27, "elapsed_time": "1:29:50", "remaining_time": "0:02:31", "throughput": 3851.73, "total_tokens": 20763520}
|
| 4146 |
+
{"current_steps": 20635, "total_steps": 21210, "loss": 0.3489, "lr": 1.1224399657546458e-07, "epoch": 9.72890146157473, "percentage": 97.29, "elapsed_time": "1:29:51", "remaining_time": "0:02:30", "throughput": 3851.82, "total_tokens": 20768480}
|
| 4147 |
+
{"current_steps": 20640, "total_steps": 21210, "loss": 0.2798, "lr": 1.1030520194344473e-07, "epoch": 9.731258840169732, "percentage": 97.31, "elapsed_time": "1:29:53", "remaining_time": "0:02:28", "throughput": 3851.93, "total_tokens": 20773760}
|
| 4148 |
+
{"current_steps": 20645, "total_steps": 21210, "loss": 0.3388, "lr": 1.0838326092161844e-07, "epoch": 9.733616218764734, "percentage": 97.34, "elapsed_time": "1:29:54", "remaining_time": "0:02:27", "throughput": 3851.96, "total_tokens": 20777696}
|
| 4149 |
+
{"current_steps": 20650, "total_steps": 21210, "loss": 0.283, "lr": 1.064781748113891e-07, "epoch": 9.735973597359736, "percentage": 97.36, "elapsed_time": "1:29:55", "remaining_time": "0:02:26", "throughput": 3852.0, "total_tokens": 20781728}
|
| 4150 |
+
{"current_steps": 20655, "total_steps": 21210, "loss": 0.3663, "lr": 1.0458994490275543e-07, "epoch": 9.738330975954739, "percentage": 97.38, "elapsed_time": "1:29:56", "remaining_time": "0:02:24", "throughput": 3852.12, "total_tokens": 20787200}
|
| 4151 |
+
{"current_steps": 20660, "total_steps": 21210, "loss": 0.3827, "lr": 1.0271857247430017e-07, "epoch": 9.740688354549741, "percentage": 97.41, "elapsed_time": "1:29:57", "remaining_time": "0:02:23", "throughput": 3852.17, "total_tokens": 20791296}
|
| 4152 |
+
{"current_steps": 20665, "total_steps": 21210, "loss": 0.3355, "lr": 1.0086405879318473e-07, "epoch": 9.743045733144744, "percentage": 97.43, "elapsed_time": "1:29:58", "remaining_time": "0:02:22", "throughput": 3852.23, "total_tokens": 20795456}
|
| 4153 |
+
{"current_steps": 20670, "total_steps": 21210, "loss": 0.3713, "lr": 9.902640511516292e-08, "epoch": 9.745403111739746, "percentage": 97.45, "elapsed_time": "1:29:59", "remaining_time": "0:02:21", "throughput": 3852.27, "total_tokens": 20799648}
|
| 4154 |
+
{"current_steps": 20675, "total_steps": 21210, "loss": 0.3354, "lr": 9.720561268456718e-08, "epoch": 9.747760490334748, "percentage": 97.48, "elapsed_time": "1:30:00", "remaining_time": "0:02:19", "throughput": 3852.34, "total_tokens": 20804480}
|
| 4155 |
+
{"current_steps": 20680, "total_steps": 21210, "loss": 0.3736, "lr": 9.540168273431682e-08, "epoch": 9.75011786892975, "percentage": 97.5, "elapsed_time": "1:30:01", "remaining_time": "0:02:18", "throughput": 3852.44, "total_tokens": 20810016}
|
| 4156 |
+
{"current_steps": 20685, "total_steps": 21210, "loss": 0.3482, "lr": 9.361461648590697e-08, "epoch": 9.752475247524753, "percentage": 97.52, "elapsed_time": "1:30:02", "remaining_time": "0:02:17", "throughput": 3852.52, "total_tokens": 20814976}
|
| 4157 |
+
{"current_steps": 20690, "total_steps": 21210, "loss": 0.337, "lr": 9.184441514942243e-08, "epoch": 9.754832626119756, "percentage": 97.55, "elapsed_time": "1:30:03", "remaining_time": "0:02:15", "throughput": 3852.57, "total_tokens": 20819104}
|
| 4158 |
+
{"current_steps": 20695, "total_steps": 21210, "loss": 0.3462, "lr": 9.009107992351828e-08, "epoch": 9.757190004714758, "percentage": 97.57, "elapsed_time": "1:30:05", "remaining_time": "0:02:14", "throughput": 3852.66, "total_tokens": 20823904}
|
| 4159 |
+
{"current_steps": 20700, "total_steps": 21210, "loss": 0.3573, "lr": 8.835461199543649e-08, "epoch": 9.75954738330976, "percentage": 97.6, "elapsed_time": "1:30:06", "remaining_time": "0:02:13", "throughput": 3852.81, "total_tokens": 20830048}
|
| 4160 |
+
{"current_steps": 20705, "total_steps": 21210, "loss": 0.3844, "lr": 8.66350125409976e-08, "epoch": 9.761904761904763, "percentage": 97.62, "elapsed_time": "1:30:07", "remaining_time": "0:02:11", "throughput": 3852.89, "total_tokens": 20834688}
|
| 4161 |
+
{"current_steps": 20710, "total_steps": 21210, "loss": 0.3647, "lr": 8.493228272459242e-08, "epoch": 9.764262140499763, "percentage": 97.64, "elapsed_time": "1:30:08", "remaining_time": "0:02:10", "throughput": 3852.95, "total_tokens": 20839040}
|
| 4162 |
+
{"current_steps": 20715, "total_steps": 21210, "loss": 0.2956, "lr": 8.324642369919588e-08, "epoch": 9.766619519094768, "percentage": 97.67, "elapsed_time": "1:30:09", "remaining_time": "0:02:09", "throughput": 3853.03, "total_tokens": 20843840}
|
| 4163 |
+
{"current_steps": 20720, "total_steps": 21210, "loss": 0.3413, "lr": 8.157743660635875e-08, "epoch": 9.768976897689768, "percentage": 97.69, "elapsed_time": "1:30:10", "remaining_time": "0:02:07", "throughput": 3853.08, "total_tokens": 20848512}
|
| 4164 |
+
{"current_steps": 20725, "total_steps": 21210, "loss": 0.3101, "lr": 7.992532257620478e-08, "epoch": 9.77133427628477, "percentage": 97.71, "elapsed_time": "1:30:11", "remaining_time": "0:02:06", "throughput": 3853.16, "total_tokens": 20853120}
|
| 4165 |
+
{"current_steps": 20730, "total_steps": 21210, "loss": 0.3208, "lr": 7.829008272743077e-08, "epoch": 9.773691654879773, "percentage": 97.74, "elapsed_time": "1:30:13", "remaining_time": "0:02:05", "throughput": 3853.23, "total_tokens": 20857536}
|
| 4166 |
+
{"current_steps": 20735, "total_steps": 21210, "loss": 0.3203, "lr": 7.667171816731489e-08, "epoch": 9.776049033474775, "percentage": 97.76, "elapsed_time": "1:30:14", "remaining_time": "0:02:04", "throughput": 3853.33, "total_tokens": 20862688}
|
| 4167 |
+
{"current_steps": 20740, "total_steps": 21210, "loss": 0.346, "lr": 7.507022999169999e-08, "epoch": 9.778406412069778, "percentage": 97.78, "elapsed_time": "1:30:15", "remaining_time": "0:02:02", "throughput": 3853.43, "total_tokens": 20867712}
|
| 4168 |
+
{"current_steps": 20745, "total_steps": 21210, "loss": 0.3177, "lr": 7.348561928500752e-08, "epoch": 9.78076379066478, "percentage": 97.81, "elapsed_time": "1:30:16", "remaining_time": "0:02:01", "throughput": 3853.49, "total_tokens": 20872544}
|
| 4169 |
+
{"current_steps": 20750, "total_steps": 21210, "loss": 0.2969, "lr": 7.19178871202264e-08, "epoch": 9.783121169259783, "percentage": 97.83, "elapsed_time": "1:30:17", "remaining_time": "0:02:00", "throughput": 3853.57, "total_tokens": 20877504}
|
| 4170 |
+
{"current_steps": 20755, "total_steps": 21210, "loss": 0.294, "lr": 7.036703455891858e-08, "epoch": 9.785478547854785, "percentage": 97.85, "elapsed_time": "1:30:18", "remaining_time": "0:01:58", "throughput": 3853.67, "total_tokens": 20882656}
|
| 4171 |
+
{"current_steps": 20760, "total_steps": 21210, "loss": 0.337, "lr": 6.883306265121625e-08, "epoch": 9.787835926449787, "percentage": 97.88, "elapsed_time": "1:30:19", "remaining_time": "0:01:57", "throughput": 3853.75, "total_tokens": 20887136}
|
| 4172 |
+
{"current_steps": 20765, "total_steps": 21210, "loss": 0.3283, "lr": 6.731597243581911e-08, "epoch": 9.79019330504479, "percentage": 97.9, "elapsed_time": "1:30:20", "remaining_time": "0:01:56", "throughput": 3853.82, "total_tokens": 20891488}
|
| 4173 |
+
{"current_steps": 20770, "total_steps": 21210, "loss": 0.3423, "lr": 6.581576494000264e-08, "epoch": 9.792550683639792, "percentage": 97.93, "elapsed_time": "1:30:22", "remaining_time": "0:01:54", "throughput": 3853.94, "total_tokens": 20896992}
|
| 4174 |
+
{"current_steps": 20775, "total_steps": 21210, "loss": 0.3464, "lr": 6.43324411795987e-08, "epoch": 9.794908062234795, "percentage": 97.95, "elapsed_time": "1:30:23", "remaining_time": "0:01:53", "throughput": 3854.0, "total_tokens": 20901504}
|
| 4175 |
+
{"current_steps": 20780, "total_steps": 21210, "loss": 0.3223, "lr": 6.286600215902049e-08, "epoch": 9.797265440829797, "percentage": 97.97, "elapsed_time": "1:30:24", "remaining_time": "0:01:52", "throughput": 3854.11, "total_tokens": 20906880}
|
| 4176 |
+
{"current_steps": 20785, "total_steps": 21210, "loss": 0.351, "lr": 6.141644887123487e-08, "epoch": 9.7996228194248, "percentage": 98.0, "elapsed_time": "1:30:25", "remaining_time": "0:01:50", "throughput": 3854.2, "total_tokens": 20911776}
|
| 4177 |
+
{"current_steps": 20790, "total_steps": 21210, "loss": 0.3694, "lr": 5.998378229778446e-08, "epoch": 9.801980198019802, "percentage": 98.02, "elapsed_time": "1:30:26", "remaining_time": "0:01:49", "throughput": 3854.32, "total_tokens": 20917376}
|
| 4178 |
+
{"current_steps": 20795, "total_steps": 21210, "loss": 0.389, "lr": 5.8568003408770996e-08, "epoch": 9.804337576614804, "percentage": 98.04, "elapsed_time": "1:30:28", "remaining_time": "0:01:48", "throughput": 3854.4, "total_tokens": 20922048}
|
| 4179 |
+
{"current_steps": 20800, "total_steps": 21210, "loss": 0.33, "lr": 5.716911316286655e-08, "epoch": 9.806694955209807, "percentage": 98.07, "elapsed_time": "1:30:29", "remaining_time": "0:01:47", "throughput": 3854.49, "total_tokens": 20927008}
|
| 4180 |
+
{"current_steps": 20805, "total_steps": 21210, "loss": 0.3283, "lr": 5.578711250730506e-08, "epoch": 9.809052333804809, "percentage": 98.09, "elapsed_time": "1:30:30", "remaining_time": "0:01:45", "throughput": 3854.53, "total_tokens": 20930944}
|
| 4181 |
+
{"current_steps": 20810, "total_steps": 21210, "loss": 0.3366, "lr": 5.4422002377879645e-08, "epoch": 9.811409712399811, "percentage": 98.11, "elapsed_time": "1:30:31", "remaining_time": "0:01:44", "throughput": 3854.6, "total_tokens": 20935296}
|
| 4182 |
+
{"current_steps": 20815, "total_steps": 21210, "loss": 0.2694, "lr": 5.3073783698950885e-08, "epoch": 9.813767090994814, "percentage": 98.14, "elapsed_time": "1:30:32", "remaining_time": "0:01:43", "throughput": 3854.68, "total_tokens": 20940064}
|
| 4183 |
+
{"current_steps": 20820, "total_steps": 21210, "loss": 0.361, "lr": 5.174245738344408e-08, "epoch": 9.816124469589816, "percentage": 98.16, "elapsed_time": "1:30:33", "remaining_time": "0:01:41", "throughput": 3854.71, "total_tokens": 20944064}
|
| 4184 |
+
{"current_steps": 20825, "total_steps": 21210, "loss": 0.3211, "lr": 5.042802433283811e-08, "epoch": 9.818481848184819, "percentage": 98.18, "elapsed_time": "1:30:34", "remaining_time": "0:01:40", "throughput": 3854.78, "total_tokens": 20948672}
|
| 4185 |
+
{"current_steps": 20830, "total_steps": 21210, "loss": 0.3126, "lr": 4.91304854371849e-08, "epoch": 9.820839226779821, "percentage": 98.21, "elapsed_time": "1:30:35", "remaining_time": "0:01:39", "throughput": 3854.86, "total_tokens": 20953696}
|
| 4186 |
+
{"current_steps": 20835, "total_steps": 21210, "loss": 0.3496, "lr": 4.784984157508166e-08, "epoch": 9.823196605374823, "percentage": 98.23, "elapsed_time": "1:30:36", "remaining_time": "0:01:37", "throughput": 3854.96, "total_tokens": 20959296}
|
| 4187 |
+
{"current_steps": 20840, "total_steps": 21210, "loss": 0.3404, "lr": 4.658609361369859e-08, "epoch": 9.825553983969826, "percentage": 98.26, "elapsed_time": "1:30:38", "remaining_time": "0:01:36", "throughput": 3855.05, "total_tokens": 20964480}
|
| 4188 |
+
{"current_steps": 20845, "total_steps": 21210, "loss": 0.3871, "lr": 4.533924240875953e-08, "epoch": 9.827911362564828, "percentage": 98.28, "elapsed_time": "1:30:39", "remaining_time": "0:01:35", "throughput": 3855.13, "total_tokens": 20969376}
|
| 4189 |
+
{"current_steps": 20850, "total_steps": 21210, "loss": 0.3445, "lr": 4.410928880454468e-08, "epoch": 9.83026874115983, "percentage": 98.3, "elapsed_time": "1:30:40", "remaining_time": "0:01:33", "throughput": 3855.24, "total_tokens": 20975648}
|
| 4190 |
+
{"current_steps": 20855, "total_steps": 21210, "loss": 0.3423, "lr": 4.2896233633896165e-08, "epoch": 9.832626119754833, "percentage": 98.33, "elapsed_time": "1:30:42", "remaining_time": "0:01:32", "throughput": 3855.37, "total_tokens": 20981632}
|
| 4191 |
+
{"current_steps": 20860, "total_steps": 21210, "loss": 0.2868, "lr": 4.170007771821527e-08, "epoch": 9.834983498349835, "percentage": 98.35, "elapsed_time": "1:30:43", "remaining_time": "0:01:31", "throughput": 3855.51, "total_tokens": 20987584}
|
| 4192 |
+
{"current_steps": 20865, "total_steps": 21210, "loss": 0.3531, "lr": 4.052082186745409e-08, "epoch": 9.837340876944838, "percentage": 98.37, "elapsed_time": "1:30:44", "remaining_time": "0:01:30", "throughput": 3855.58, "total_tokens": 20992256}
|
| 4193 |
+
{"current_steps": 20870, "total_steps": 21210, "loss": 0.3414, "lr": 3.9358466880126674e-08, "epoch": 9.83969825553984, "percentage": 98.4, "elapsed_time": "1:30:45", "remaining_time": "0:01:28", "throughput": 3855.69, "total_tokens": 20997504}
|
| 4194 |
+
{"current_steps": 20875, "total_steps": 21210, "loss": 0.333, "lr": 3.821301354329787e-08, "epoch": 9.842055634134843, "percentage": 98.42, "elapsed_time": "1:30:47", "remaining_time": "0:01:27", "throughput": 3855.78, "total_tokens": 21002752}
|
| 4195 |
+
{"current_steps": 20880, "total_steps": 21210, "loss": 0.2725, "lr": 3.7084462632594465e-08, "epoch": 9.844413012729845, "percentage": 98.44, "elapsed_time": "1:30:48", "remaining_time": "0:01:26", "throughput": 3855.91, "total_tokens": 21008608}
|
| 4196 |
+
{"current_steps": 20885, "total_steps": 21210, "loss": 0.3262, "lr": 3.597281491219129e-08, "epoch": 9.846770391324847, "percentage": 98.47, "elapsed_time": "1:30:49", "remaining_time": "0:01:24", "throughput": 3855.97, "total_tokens": 21012928}
|
| 4197 |
+
{"current_steps": 20890, "total_steps": 21210, "loss": 0.3604, "lr": 3.487807113482511e-08, "epoch": 9.84912776991985, "percentage": 98.49, "elapsed_time": "1:30:50", "remaining_time": "0:01:23", "throughput": 3856.06, "total_tokens": 21018080}
|
| 4198 |
+
{"current_steps": 20895, "total_steps": 21210, "loss": 0.3134, "lr": 3.3800232041777954e-08, "epoch": 9.851485148514852, "percentage": 98.51, "elapsed_time": "1:30:51", "remaining_time": "0:01:22", "throughput": 3856.11, "total_tokens": 21022112}
|
| 4199 |
+
{"current_steps": 20900, "total_steps": 21210, "loss": 0.3121, "lr": 3.2739298362888246e-08, "epoch": 9.853842527109855, "percentage": 98.54, "elapsed_time": "1:30:52", "remaining_time": "0:01:20", "throughput": 3856.22, "total_tokens": 21027712}
|
| 4200 |
+
{"current_steps": 20905, "total_steps": 21210, "loss": 0.3572, "lr": 3.1695270816553546e-08, "epoch": 9.856199905704855, "percentage": 98.56, "elapsed_time": "1:30:54", "remaining_time": "0:01:19", "throughput": 3856.28, "total_tokens": 21032384}
|
| 4201 |
+
{"current_steps": 20910, "total_steps": 21210, "loss": 0.348, "lr": 3.066815010971391e-08, "epoch": 9.85855728429986, "percentage": 98.59, "elapsed_time": "1:30:55", "remaining_time": "0:01:18", "throughput": 3856.39, "total_tokens": 21037824}
|
| 4202 |
+
{"current_steps": 20915, "total_steps": 21210, "loss": 0.2702, "lr": 2.9657936937865782e-08, "epoch": 9.86091466289486, "percentage": 98.61, "elapsed_time": "1:30:56", "remaining_time": "0:01:16", "throughput": 3856.48, "total_tokens": 21042816}
|
| 4203 |
+
{"current_steps": 20920, "total_steps": 21210, "loss": 0.2773, "lr": 2.86646319850592e-08, "epoch": 9.863272041489862, "percentage": 98.63, "elapsed_time": "1:30:57", "remaining_time": "0:01:15", "throughput": 3856.56, "total_tokens": 21047520}
|
| 4204 |
+
{"current_steps": 20925, "total_steps": 21210, "loss": 0.3455, "lr": 2.768823592389225e-08, "epoch": 9.865629420084865, "percentage": 98.66, "elapsed_time": "1:30:58", "remaining_time": "0:01:14", "throughput": 3856.65, "total_tokens": 21052512}
|
| 4205 |
+
{"current_steps": 20930, "total_steps": 21210, "loss": 0.3351, "lr": 2.6728749415511066e-08, "epoch": 9.867986798679867, "percentage": 98.68, "elapsed_time": "1:31:00", "remaining_time": "0:01:13", "throughput": 3856.75, "total_tokens": 21057952}
|
| 4206 |
+
{"current_steps": 20935, "total_steps": 21210, "loss": 0.3334, "lr": 2.5786173109620948e-08, "epoch": 9.87034417727487, "percentage": 98.7, "elapsed_time": "1:31:01", "remaining_time": "0:01:11", "throughput": 3856.86, "total_tokens": 21063200}
|
| 4207 |
+
{"current_steps": 20940, "total_steps": 21210, "loss": 0.301, "lr": 2.4860507644464126e-08, "epoch": 9.872701555869872, "percentage": 98.73, "elapsed_time": "1:31:02", "remaining_time": "0:01:10", "throughput": 3856.98, "total_tokens": 21069024}
|
| 4208 |
+
{"current_steps": 20945, "total_steps": 21210, "loss": 0.299, "lr": 2.395175364684199e-08, "epoch": 9.875058934464874, "percentage": 98.75, "elapsed_time": "1:31:03", "remaining_time": "0:01:09", "throughput": 3857.01, "total_tokens": 21073408}
|
| 4209 |
+
{"current_steps": 20950, "total_steps": 21210, "loss": 0.3514, "lr": 2.305991173209843e-08, "epoch": 9.877416313059877, "percentage": 98.77, "elapsed_time": "1:31:04", "remaining_time": "0:01:07", "throughput": 3857.1, "total_tokens": 21078688}
|
| 4210 |
+
{"current_steps": 20955, "total_steps": 21210, "loss": 0.3276, "lr": 2.2184982504130926e-08, "epoch": 9.87977369165488, "percentage": 98.8, "elapsed_time": "1:31:05", "remaining_time": "0:01:06", "throughput": 3857.16, "total_tokens": 21082912}
|
| 4211 |
+
{"current_steps": 20960, "total_steps": 21210, "loss": 0.2838, "lr": 2.1326966555379468e-08, "epoch": 9.882131070249882, "percentage": 98.82, "elapsed_time": "1:31:07", "remaining_time": "0:01:05", "throughput": 3857.25, "total_tokens": 21088192}
|
| 4212 |
+
{"current_steps": 20965, "total_steps": 21210, "loss": 0.3673, "lr": 2.0485864466837645e-08, "epoch": 9.884488448844884, "percentage": 98.84, "elapsed_time": "1:31:08", "remaining_time": "0:01:03", "throughput": 3857.31, "total_tokens": 21092768}
|
| 4213 |
+
{"current_steps": 20970, "total_steps": 21210, "loss": 0.3534, "lr": 1.9661676808038763e-08, "epoch": 9.886845827439886, "percentage": 98.87, "elapsed_time": "1:31:09", "remaining_time": "0:01:02", "throughput": 3857.38, "total_tokens": 21097888}
|
| 4214 |
+
{"current_steps": 20975, "total_steps": 21210, "loss": 0.3053, "lr": 1.8854404137069738e-08, "epoch": 9.889203206034889, "percentage": 98.89, "elapsed_time": "1:31:10", "remaining_time": "0:01:01", "throughput": 3857.45, "total_tokens": 21102400}
|
| 4215 |
+
{"current_steps": 20980, "total_steps": 21210, "loss": 0.2478, "lr": 1.8064047000557193e-08, "epoch": 9.891560584629891, "percentage": 98.92, "elapsed_time": "1:31:11", "remaining_time": "0:00:59", "throughput": 3857.58, "total_tokens": 21108160}
|
| 4216 |
+
{"current_steps": 20985, "total_steps": 21210, "loss": 0.2866, "lr": 1.729060593368137e-08, "epoch": 9.893917963224894, "percentage": 98.94, "elapsed_time": "1:31:13", "remaining_time": "0:00:58", "throughput": 3857.67, "total_tokens": 21113184}
|
| 4217 |
+
{"current_steps": 20990, "total_steps": 21210, "loss": 0.3231, "lr": 1.653408146016222e-08, "epoch": 9.896275341819896, "percentage": 98.96, "elapsed_time": "1:31:14", "remaining_time": "0:00:57", "throughput": 3857.75, "total_tokens": 21118208}
|
| 4218 |
+
{"current_steps": 20995, "total_steps": 21210, "loss": 0.3198, "lr": 1.5794474092267753e-08, "epoch": 9.898632720414899, "percentage": 98.99, "elapsed_time": "1:31:15", "remaining_time": "0:00:56", "throughput": 3857.83, "total_tokens": 21123488}
|
| 4219 |
+
{"current_steps": 21000, "total_steps": 21210, "loss": 0.2892, "lr": 1.507178433080847e-08, "epoch": 9.900990099009901, "percentage": 99.01, "elapsed_time": "1:31:16", "remaining_time": "0:00:54", "throughput": 3857.92, "total_tokens": 21128800}
|
| 4220 |
+
{"current_steps": 21005, "total_steps": 21210, "loss": 0.2661, "lr": 1.4366012665140149e-08, "epoch": 9.903347477604903, "percentage": 99.03, "elapsed_time": "1:31:17", "remaining_time": "0:00:53", "throughput": 3857.96, "total_tokens": 21132352}
|
| 4221 |
+
{"current_steps": 21010, "total_steps": 21210, "loss": 0.3496, "lr": 1.3677159573163844e-08, "epoch": 9.905704856199906, "percentage": 99.06, "elapsed_time": "1:31:19", "remaining_time": "0:00:52", "throughput": 3858.12, "total_tokens": 21139040}
|
| 4222 |
+
{"current_steps": 21015, "total_steps": 21210, "loss": 0.3129, "lr": 1.3005225521325881e-08, "epoch": 9.908062234794908, "percentage": 99.08, "elapsed_time": "1:31:20", "remaining_time": "0:00:50", "throughput": 3858.18, "total_tokens": 21143840}
|
| 4223 |
+
{"current_steps": 21020, "total_steps": 21210, "loss": 0.2627, "lr": 1.2350210964612308e-08, "epoch": 9.91041961338991, "percentage": 99.1, "elapsed_time": "1:31:21", "remaining_time": "0:00:49", "throughput": 3858.31, "total_tokens": 21149664}
|
| 4224 |
+
{"current_steps": 21025, "total_steps": 21210, "loss": 0.31, "lr": 1.1712116346557222e-08, "epoch": 9.912776991984913, "percentage": 99.13, "elapsed_time": "1:31:23", "remaining_time": "0:00:48", "throughput": 3858.46, "total_tokens": 21156064}
|
| 4225 |
+
{"current_steps": 21030, "total_steps": 21210, "loss": 0.3133, "lr": 1.1090942099228895e-08, "epoch": 9.915134370579915, "percentage": 99.15, "elapsed_time": "1:31:24", "remaining_time": "0:00:46", "throughput": 3858.54, "total_tokens": 21160928}
|
| 4226 |
+
{"current_steps": 21035, "total_steps": 21210, "loss": 0.3323, "lr": 1.0486688643251974e-08, "epoch": 9.917491749174918, "percentage": 99.17, "elapsed_time": "1:31:25", "remaining_time": "0:00:45", "throughput": 3858.6, "total_tokens": 21165248}
|
| 4227 |
+
{"current_steps": 21040, "total_steps": 21210, "loss": 0.3219, "lr": 9.899356387779724e-09, "epoch": 9.91984912776992, "percentage": 99.2, "elapsed_time": "1:31:26", "remaining_time": "0:00:44", "throughput": 3858.7, "total_tokens": 21170112}
|
| 4228 |
+
{"current_steps": 21045, "total_steps": 21210, "loss": 0.3102, "lr": 9.328945730519012e-09, "epoch": 9.922206506364923, "percentage": 99.22, "elapsed_time": "1:31:27", "remaining_time": "0:00:43", "throughput": 3858.8, "total_tokens": 21175584}
|
| 4229 |
+
{"current_steps": 21050, "total_steps": 21210, "loss": 0.3287, "lr": 8.775457057708102e-09, "epoch": 9.924563884959925, "percentage": 99.25, "elapsed_time": "1:31:28", "remaining_time": "0:00:41", "throughput": 3858.86, "total_tokens": 21179744}
|
| 4230 |
+
{"current_steps": 21055, "total_steps": 21210, "loss": 0.3487, "lr": 8.238890744136084e-09, "epoch": 9.926921263554927, "percentage": 99.27, "elapsed_time": "1:31:29", "remaining_time": "0:00:40", "throughput": 3858.94, "total_tokens": 21184704}
|
| 4231 |
+
{"current_steps": 21060, "total_steps": 21210, "loss": 0.3394, "lr": 7.71924715312622e-09, "epoch": 9.92927864214993, "percentage": 99.29, "elapsed_time": "1:31:30", "remaining_time": "0:00:39", "throughput": 3859.03, "total_tokens": 21189664}
|
| 4232 |
+
{"current_steps": 21065, "total_steps": 21210, "loss": 0.3094, "lr": 7.216526636547039e-09, "epoch": 9.931636020744932, "percentage": 99.32, "elapsed_time": "1:31:32", "remaining_time": "0:00:37", "throughput": 3859.1, "total_tokens": 21194240}
|
| 4233 |
+
{"current_steps": 21070, "total_steps": 21210, "loss": 0.3551, "lr": 6.730729534804025e-09, "epoch": 9.933993399339935, "percentage": 99.34, "elapsed_time": "1:31:33", "remaining_time": "0:00:36", "throughput": 3859.21, "total_tokens": 21199552}
|
| 4234 |
+
{"current_steps": 21075, "total_steps": 21210, "loss": 0.3329, "lr": 6.261856176850711e-09, "epoch": 9.936350777934937, "percentage": 99.36, "elapsed_time": "1:31:34", "remaining_time": "0:00:35", "throughput": 3859.25, "total_tokens": 21203232}
|
| 4235 |
+
{"current_steps": 21080, "total_steps": 21210, "loss": 0.3065, "lr": 5.809906880174798e-09, "epoch": 9.93870815652994, "percentage": 99.39, "elapsed_time": "1:31:35", "remaining_time": "0:00:33", "throughput": 3859.4, "total_tokens": 21209696}
|
| 4236 |
+
{"current_steps": 21085, "total_steps": 21210, "loss": 0.3348, "lr": 5.374881950803712e-09, "epoch": 9.941065535124942, "percentage": 99.41, "elapsed_time": "1:31:36", "remaining_time": "0:00:32", "throughput": 3859.48, "total_tokens": 21214688}
|
| 4237 |
+
{"current_steps": 21090, "total_steps": 21210, "loss": 0.3285, "lr": 4.956781683310152e-09, "epoch": 9.943422913719944, "percentage": 99.43, "elapsed_time": "1:31:38", "remaining_time": "0:00:31", "throughput": 3859.64, "total_tokens": 21221184}
|
| 4238 |
+
{"current_steps": 21095, "total_steps": 21210, "loss": 0.3072, "lr": 4.555606360798215e-09, "epoch": 9.945780292314947, "percentage": 99.46, "elapsed_time": "1:31:39", "remaining_time": "0:00:29", "throughput": 3859.67, "total_tokens": 21225408}
|
| 4239 |
+
{"current_steps": 21100, "total_steps": 21210, "loss": 0.3194, "lr": 4.171356254920045e-09, "epoch": 9.948137670909949, "percentage": 99.48, "elapsed_time": "1:31:40", "remaining_time": "0:00:28", "throughput": 3859.76, "total_tokens": 21230400}
|
| 4240 |
+
{"current_steps": 21105, "total_steps": 21210, "loss": 0.3172, "lr": 3.804031625864735e-09, "epoch": 9.950495049504951, "percentage": 99.5, "elapsed_time": "1:31:41", "remaining_time": "0:00:27", "throughput": 3859.81, "total_tokens": 21234752}
|
| 4241 |
+
{"current_steps": 21110, "total_steps": 21210, "loss": 0.3453, "lr": 3.453632722358324e-09, "epoch": 9.952852428099952, "percentage": 99.53, "elapsed_time": "1:31:42", "remaining_time": "0:00:26", "throughput": 3859.92, "total_tokens": 21240192}
|
| 4242 |
+
{"current_steps": 21115, "total_steps": 21210, "loss": 0.3439, "lr": 3.1201597816638006e-09, "epoch": 9.955209806694956, "percentage": 99.55, "elapsed_time": "1:31:43", "remaining_time": "0:00:24", "throughput": 3859.97, "total_tokens": 21244608}
|
| 4243 |
+
{"current_steps": 21120, "total_steps": 21210, "loss": 0.3861, "lr": 2.8036130295922004e-09, "epoch": 9.957567185289957, "percentage": 99.58, "elapsed_time": "1:31:45", "remaining_time": "0:00:23", "throughput": 3860.11, "total_tokens": 21251680}
|
| 4244 |
+
{"current_steps": 21125, "total_steps": 21210, "loss": 0.3405, "lr": 2.5039926804831803e-09, "epoch": 9.95992456388496, "percentage": 99.6, "elapsed_time": "1:31:46", "remaining_time": "0:00:22", "throughput": 3860.22, "total_tokens": 21257600}
|
| 4245 |
+
{"current_steps": 21130, "total_steps": 21210, "loss": 0.3087, "lr": 2.2212989372188964e-09, "epoch": 9.962281942479962, "percentage": 99.62, "elapsed_time": "1:31:47", "remaining_time": "0:00:20", "throughput": 3860.29, "total_tokens": 21262336}
|
| 4246 |
+
{"current_steps": 21135, "total_steps": 21210, "loss": 0.306, "lr": 1.955531991224002e-09, "epoch": 9.964639321074964, "percentage": 99.65, "elapsed_time": "1:31:49", "remaining_time": "0:00:19", "throughput": 3860.36, "total_tokens": 21267136}
|
| 4247 |
+
{"current_steps": 21140, "total_steps": 21210, "loss": 0.2891, "lr": 1.7066920224573236e-09, "epoch": 9.966996699669966, "percentage": 99.67, "elapsed_time": "1:31:50", "remaining_time": "0:00:18", "throughput": 3860.42, "total_tokens": 21271872}
|
| 4248 |
+
{"current_steps": 21145, "total_steps": 21210, "loss": 0.298, "lr": 1.4747791994118575e-09, "epoch": 9.969354078264969, "percentage": 99.69, "elapsed_time": "1:31:51", "remaining_time": "0:00:16", "throughput": 3860.52, "total_tokens": 21277056}
|
| 4249 |
+
{"current_steps": 21150, "total_steps": 21210, "loss": 0.337, "lr": 1.2597936791286514e-09, "epoch": 9.971711456859971, "percentage": 99.72, "elapsed_time": "1:31:52", "remaining_time": "0:00:15", "throughput": 3860.6, "total_tokens": 21281888}
|
| 4250 |
+
{"current_steps": 21155, "total_steps": 21210, "loss": 0.2993, "lr": 1.061735607177372e-09, "epoch": 9.974068835454974, "percentage": 99.74, "elapsed_time": "1:31:54", "remaining_time": "0:00:14", "throughput": 3860.72, "total_tokens": 21288128}
|
| 4251 |
+
{"current_steps": 21160, "total_steps": 21210, "loss": 0.3316, "lr": 8.806051176729612e-10, "epoch": 9.976426214049976, "percentage": 99.76, "elapsed_time": "1:31:55", "remaining_time": "0:00:13", "throughput": 3860.81, "total_tokens": 21293280}
|
| 4252 |
+
{"current_steps": 21165, "total_steps": 21210, "loss": 0.2848, "lr": 7.164023332617564e-10, "epoch": 9.978783592644978, "percentage": 99.79, "elapsed_time": "1:31:56", "remaining_time": "0:00:11", "throughput": 3860.92, "total_tokens": 21298880}
|
| 4253 |
+
{"current_steps": 21170, "total_steps": 21210, "loss": 0.2927, "lr": 5.691273651325935e-10, "epoch": 9.98114097123998, "percentage": 99.81, "elapsed_time": "1:31:57", "remaining_time": "0:00:10", "throughput": 3861.05, "total_tokens": 21304480}
|
| 4254 |
+
{"current_steps": 21175, "total_steps": 21210, "loss": 0.3214, "lr": 4.3878031300847997e-10, "epoch": 9.983498349834983, "percentage": 99.83, "elapsed_time": "1:31:58", "remaining_time": "0:00:09", "throughput": 3861.11, "total_tokens": 21309120}
|
| 4255 |
+
{"current_steps": 21180, "total_steps": 21210, "loss": 0.3304, "lr": 3.253612651521465e-10, "epoch": 9.985855728429986, "percentage": 99.86, "elapsed_time": "1:32:00", "remaining_time": "0:00:07", "throughput": 3861.19, "total_tokens": 21314048}
|
| 4256 |
+
{"current_steps": 21185, "total_steps": 21210, "loss": 0.3329, "lr": 2.2887029836327067e-10, "epoch": 9.988213107024988, "percentage": 99.88, "elapsed_time": "1:32:01", "remaining_time": "0:00:06", "throughput": 3861.31, "total_tokens": 21319552}
|
| 4257 |
+
{"current_steps": 21190, "total_steps": 21210, "loss": 0.3012, "lr": 1.4930747798125312e-10, "epoch": 9.99057048561999, "percentage": 99.91, "elapsed_time": "1:32:02", "remaining_time": "0:00:05", "throughput": 3861.36, "total_tokens": 21323808}
|
| 4258 |
+
{"current_steps": 21195, "total_steps": 21210, "loss": 0.3043, "lr": 8.66728578768905e-11, "epoch": 9.992927864214993, "percentage": 99.93, "elapsed_time": "1:32:03", "remaining_time": "0:00:03", "throughput": 3861.47, "total_tokens": 21329056}
|
| 4259 |
+
{"current_steps": 21200, "total_steps": 21210, "loss": 0.3316, "lr": 4.096648046347795e-11, "epoch": 9.995285242809995, "percentage": 99.95, "elapsed_time": "1:32:04", "remaining_time": "0:00:02", "throughput": 3861.51, "total_tokens": 21333280}
|
| 4260 |
+
{"current_steps": 21205, "total_steps": 21210, "loss": 0.3499, "lr": 1.218837669125783e-11, "epoch": 9.997642621404998, "percentage": 99.98, "elapsed_time": "1:32:05", "remaining_time": "0:00:01", "throughput": 3861.56, "total_tokens": 21337632}
|
| 4261 |
+
{"current_steps": 21210, "total_steps": 21210, "loss": 0.3516, "lr": 3.3856604464421736e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:32:06", "remaining_time": "0:00:00", "throughput": 3861.58, "total_tokens": 21342336}
|
| 4262 |
+
{"current_steps": 21210, "total_steps": 21210, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:32:08", "remaining_time": "0:00:00", "throughput": 3860.45, "total_tokens": 21342336}
|