sedrickkeh commited on
Commit
a202694
·
verified ·
1 Parent(s): ae5f818

Training in progress, epoch 2

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:932693bbcde0d4d15045c0db5446e10155f944b86ca4543c3a1ec6ead22fe1b8
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be380a6d643857f9d9d29371f845e75453b819451971e6ee28a0c423ee0f82dc
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b533c8ede529526ffc51d062875a1d64a4c3cdf2febf6cfbaec763fe2bd1e0f
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2b5c196212a3fd68239004cf7e2955239a135ba194cf798815d3895b16a924
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6350e2536398f18d37afb18ef18ceff9828fb8643d1beed68dd0a46274c5b33
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45165918507851765c2f4fa88af399a50e157cdc9e6a92b34bd929e1fd19c91e
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa9e69252bbc3f9939695d1674a92b8e5620a0733538c2e7720671927d731408
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf200a7d87cbfa6f8aeaf58076bae6482ceb2b7cf70dd7c917d2d8704f0b544
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -123,3 +123,65 @@
123
  {"current_steps": 123, "total_steps": 186, "loss": 0.3779, "lr": 2.4953740815929112e-05, "epoch": 1.972, "percentage": 66.13, "elapsed_time": "7:28:32", "remaining_time": "3:49:44"}
124
  {"current_steps": 124, "total_steps": 186, "loss": 0.3772, "lr": 2.4259231755340185e-05, "epoch": 1.988, "percentage": 66.67, "elapsed_time": "7:32:13", "remaining_time": "3:46:06"}
125
  {"current_steps": 125, "total_steps": 186, "loss": 0.6469, "lr": 2.3570293011867705e-05, "epoch": 2.008, "percentage": 67.2, "elapsed_time": "7:37:24", "remaining_time": "3:43:12"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  {"current_steps": 123, "total_steps": 186, "loss": 0.3779, "lr": 2.4953740815929112e-05, "epoch": 1.972, "percentage": 66.13, "elapsed_time": "7:28:32", "remaining_time": "3:49:44"}
124
  {"current_steps": 124, "total_steps": 186, "loss": 0.3772, "lr": 2.4259231755340185e-05, "epoch": 1.988, "percentage": 66.67, "elapsed_time": "7:32:13", "remaining_time": "3:46:06"}
125
  {"current_steps": 125, "total_steps": 186, "loss": 0.6469, "lr": 2.3570293011867705e-05, "epoch": 2.008, "percentage": 67.2, "elapsed_time": "7:37:24", "remaining_time": "3:43:12"}
126
+ {"current_steps": 126, "total_steps": 186, "loss": 0.3551, "lr": 2.2887168386018446e-05, "epoch": 2.024, "percentage": 67.74, "elapsed_time": "7:41:01", "remaining_time": "3:39:32"}
127
+ {"current_steps": 127, "total_steps": 186, "loss": 0.3608, "lr": 2.221009962080883e-05, "epoch": 2.04, "percentage": 68.28, "elapsed_time": "7:44:38", "remaining_time": "3:35:51"}
128
+ {"current_steps": 128, "total_steps": 186, "loss": 0.3618, "lr": 2.153932631621725e-05, "epoch": 2.056, "percentage": 68.82, "elapsed_time": "7:48:15", "remaining_time": "3:32:10"}
129
+ {"current_steps": 129, "total_steps": 186, "loss": 0.36, "lr": 2.0875085844394933e-05, "epoch": 2.072, "percentage": 69.35, "elapsed_time": "7:51:53", "remaining_time": "3:28:30"}
130
+ {"current_steps": 130, "total_steps": 186, "loss": 0.366, "lr": 2.021761326566499e-05, "epoch": 2.088, "percentage": 69.89, "elapsed_time": "7:55:30", "remaining_time": "3:24:50"}
131
+ {"current_steps": 131, "total_steps": 186, "loss": 0.3577, "lr": 1.9567141245339695e-05, "epoch": 2.104, "percentage": 70.43, "elapsed_time": "7:59:08", "remaining_time": "3:21:09"}
132
+ {"current_steps": 132, "total_steps": 186, "loss": 0.3628, "lr": 1.8923899971385215e-05, "epoch": 2.12, "percentage": 70.97, "elapsed_time": "8:02:45", "remaining_time": "3:17:29"}
133
+ {"current_steps": 133, "total_steps": 186, "loss": 0.3596, "lr": 1.828811707296315e-05, "epoch": 2.136, "percentage": 71.51, "elapsed_time": "8:06:23", "remaining_time": "3:13:49"}
134
+ {"current_steps": 134, "total_steps": 186, "loss": 0.3576, "lr": 1.766001753987748e-05, "epoch": 2.152, "percentage": 72.04, "elapsed_time": "8:10:00", "remaining_time": "3:10:09"}
135
+ {"current_steps": 135, "total_steps": 186, "loss": 0.3625, "lr": 1.7039823642955458e-05, "epoch": 2.168, "percentage": 72.58, "elapsed_time": "8:13:38", "remaining_time": "3:06:29"}
136
+ {"current_steps": 136, "total_steps": 186, "loss": 0.3579, "lr": 1.642775485539092e-05, "epoch": 2.184, "percentage": 73.12, "elapsed_time": "8:17:15", "remaining_time": "3:02:49"}
137
+ {"current_steps": 137, "total_steps": 186, "loss": 0.3584, "lr": 1.5824027775077322e-05, "epoch": 2.2, "percentage": 73.66, "elapsed_time": "8:20:53", "remaining_time": "2:59:09"}
138
+ {"current_steps": 138, "total_steps": 186, "loss": 0.359, "lr": 1.5228856047958603e-05, "epoch": 2.216, "percentage": 74.19, "elapsed_time": "8:24:31", "remaining_time": "2:55:29"}
139
+ {"current_steps": 139, "total_steps": 186, "loss": 0.3659, "lr": 1.4642450292424326e-05, "epoch": 2.232, "percentage": 74.73, "elapsed_time": "8:28:08", "remaining_time": "2:51:49"}
140
+ {"current_steps": 140, "total_steps": 186, "loss": 0.3572, "lr": 1.4065018024776533e-05, "epoch": 2.248, "percentage": 75.27, "elapsed_time": "8:31:45", "remaining_time": "2:48:09"}
141
+ {"current_steps": 141, "total_steps": 186, "loss": 0.365, "lr": 1.3496763585794166e-05, "epoch": 2.2640000000000002, "percentage": 75.81, "elapsed_time": "8:35:23", "remaining_time": "2:44:29"}
142
+ {"current_steps": 142, "total_steps": 186, "loss": 0.3513, "lr": 1.2937888068421254e-05, "epoch": 2.2800000000000002, "percentage": 76.34, "elapsed_time": "8:39:01", "remaining_time": "2:40:49"}
143
+ {"current_steps": 143, "total_steps": 186, "loss": 0.3515, "lr": 1.238858924660456e-05, "epoch": 2.296, "percentage": 76.88, "elapsed_time": "8:42:38", "remaining_time": "2:37:09"}
144
+ {"current_steps": 144, "total_steps": 186, "loss": 0.3576, "lr": 1.184906150530555e-05, "epoch": 2.312, "percentage": 77.42, "elapsed_time": "8:46:16", "remaining_time": "2:33:29"}
145
+ {"current_steps": 145, "total_steps": 186, "loss": 0.3611, "lr": 1.1319495771711884e-05, "epoch": 2.328, "percentage": 77.96, "elapsed_time": "8:49:53", "remaining_time": "2:29:49"}
146
+ {"current_steps": 146, "total_steps": 186, "loss": 0.3584, "lr": 1.0800079447672318e-05, "epoch": 2.344, "percentage": 78.49, "elapsed_time": "8:53:31", "remaining_time": "2:26:10"}
147
+ {"current_steps": 147, "total_steps": 186, "loss": 0.3596, "lr": 1.0290996343379316e-05, "epoch": 2.36, "percentage": 79.03, "elapsed_time": "8:57:08", "remaining_time": "2:22:30"}
148
+ {"current_steps": 148, "total_steps": 186, "loss": 0.3631, "lr": 9.79242661232256e-06, "epoch": 2.376, "percentage": 79.57, "elapsed_time": "9:00:45", "remaining_time": "2:18:50"}
149
+ {"current_steps": 149, "total_steps": 186, "loss": 0.3597, "lr": 9.304546687536523e-06, "epoch": 2.392, "percentage": 80.11, "elapsed_time": "9:04:23", "remaining_time": "2:15:10"}
150
+ {"current_steps": 150, "total_steps": 186, "loss": 0.3593, "lr": 8.827529219164704e-06, "epoch": 2.408, "percentage": 80.65, "elapsed_time": "9:08:00", "remaining_time": "2:11:31"}
151
+ {"current_steps": 151, "total_steps": 186, "loss": 0.3578, "lr": 8.361543013362384e-06, "epoch": 2.424, "percentage": 81.18, "elapsed_time": "9:11:38", "remaining_time": "2:07:51"}
152
+ {"current_steps": 152, "total_steps": 186, "loss": 0.3564, "lr": 7.90675297255986e-06, "epoch": 2.44, "percentage": 81.72, "elapsed_time": "9:15:15", "remaining_time": "2:04:12"}
153
+ {"current_steps": 153, "total_steps": 186, "loss": 0.3555, "lr": 7.463320037107018e-06, "epoch": 2.456, "percentage": 82.26, "elapsed_time": "9:18:51", "remaining_time": "2:00:32"}
154
+ {"current_steps": 154, "total_steps": 186, "loss": 0.3618, "lr": 7.031401128320019e-06, "epoch": 2.472, "percentage": 82.8, "elapsed_time": "9:22:29", "remaining_time": "1:56:52"}
155
+ {"current_steps": 155, "total_steps": 186, "loss": 0.3623, "lr": 6.61114909295026e-06, "epoch": 2.488, "percentage": 83.33, "elapsed_time": "9:26:07", "remaining_time": "1:53:13"}
156
+ {"current_steps": 156, "total_steps": 186, "loss": 0.3576, "lr": 6.202712649095177e-06, "epoch": 2.504, "percentage": 83.87, "elapsed_time": "9:29:44", "remaining_time": "1:49:33"}
157
+ {"current_steps": 157, "total_steps": 186, "loss": 0.3618, "lr": 5.806236333570203e-06, "epoch": 2.52, "percentage": 84.41, "elapsed_time": "9:33:21", "remaining_time": "1:45:54"}
158
+ {"current_steps": 158, "total_steps": 186, "loss": 0.3573, "lr": 5.421860450760226e-06, "epoch": 2.536, "percentage": 84.95, "elapsed_time": "9:36:58", "remaining_time": "1:42:14"}
159
+ {"current_steps": 159, "total_steps": 186, "loss": 0.3555, "lr": 5.04972102296899e-06, "epoch": 2.552, "percentage": 85.48, "elapsed_time": "9:40:35", "remaining_time": "1:38:35"}
160
+ {"current_steps": 160, "total_steps": 186, "loss": 0.3596, "lr": 4.6899497422837035e-06, "epoch": 2.568, "percentage": 86.02, "elapsed_time": "9:44:13", "remaining_time": "1:34:56"}
161
+ {"current_steps": 161, "total_steps": 186, "loss": 0.3558, "lr": 4.34267392397206e-06, "epoch": 2.584, "percentage": 86.56, "elapsed_time": "9:47:50", "remaining_time": "1:31:16"}
162
+ {"current_steps": 162, "total_steps": 186, "loss": 0.3534, "lr": 4.0080164614281395e-06, "epoch": 2.6, "percentage": 87.1, "elapsed_time": "9:51:28", "remaining_time": "1:27:37"}
163
+ {"current_steps": 163, "total_steps": 186, "loss": 0.3596, "lr": 3.6860957826830545e-06, "epoch": 2.616, "percentage": 87.63, "elapsed_time": "9:55:05", "remaining_time": "1:23:58"}
164
+ {"current_steps": 164, "total_steps": 186, "loss": 0.3552, "lr": 3.377025808495913e-06, "epoch": 2.632, "percentage": 88.17, "elapsed_time": "9:58:44", "remaining_time": "1:20:19"}
165
+ {"current_steps": 165, "total_steps": 186, "loss": 0.3581, "lr": 3.080915912039628e-06, "epoch": 2.648, "percentage": 88.71, "elapsed_time": "10:02:21", "remaining_time": "1:16:39"}
166
+ {"current_steps": 166, "total_steps": 186, "loss": 0.3547, "lr": 2.797870880196203e-06, "epoch": 2.664, "percentage": 89.25, "elapsed_time": "10:05:59", "remaining_time": "1:13:00"}
167
+ {"current_steps": 167, "total_steps": 186, "loss": 0.3601, "lr": 2.527990876474893e-06, "epoch": 2.68, "percentage": 89.78, "elapsed_time": "10:09:36", "remaining_time": "1:09:21"}
168
+ {"current_steps": 168, "total_steps": 186, "loss": 0.3545, "lr": 2.271371405566485e-06, "epoch": 2.6959999999999997, "percentage": 90.32, "elapsed_time": "10:13:14", "remaining_time": "1:05:42"}
169
+ {"current_steps": 169, "total_steps": 186, "loss": 0.3553, "lr": 2.0281032795462963e-06, "epoch": 2.7119999999999997, "percentage": 90.86, "elapsed_time": "10:16:51", "remaining_time": "1:02:03"}
170
+ {"current_steps": 170, "total_steps": 186, "loss": 0.3575, "lr": 1.7982725857377036e-06, "epoch": 2.7279999999999998, "percentage": 91.4, "elapsed_time": "10:20:29", "remaining_time": "0:58:23"}
171
+ {"current_steps": 171, "total_steps": 186, "loss": 0.3552, "lr": 1.5819606562477475e-06, "epoch": 2.7439999999999998, "percentage": 91.94, "elapsed_time": "10:24:06", "remaining_time": "0:54:44"}
172
+ {"current_steps": 172, "total_steps": 186, "loss": 0.3559, "lr": 1.3792440391854122e-06, "epoch": 2.76, "percentage": 92.47, "elapsed_time": "10:27:44", "remaining_time": "0:51:05"}
173
+ {"current_steps": 173, "total_steps": 186, "loss": 0.3593, "lr": 1.190194471572963e-06, "epoch": 2.776, "percentage": 93.01, "elapsed_time": "10:31:21", "remaining_time": "0:47:26"}
174
+ {"current_steps": 174, "total_steps": 186, "loss": 0.3621, "lr": 1.0148788539597176e-06, "epoch": 2.792, "percentage": 93.55, "elapsed_time": "10:34:58", "remaining_time": "0:43:47"}
175
+ {"current_steps": 175, "total_steps": 186, "loss": 0.3589, "lr": 8.533592267474166e-07, "epoch": 2.808, "percentage": 94.09, "elapsed_time": "10:38:36", "remaining_time": "0:40:08"}
176
+ {"current_steps": 176, "total_steps": 186, "loss": 0.3563, "lr": 7.056927482354514e-07, "epoch": 2.824, "percentage": 94.62, "elapsed_time": "10:42:13", "remaining_time": "0:36:29"}
177
+ {"current_steps": 177, "total_steps": 186, "loss": 0.3547, "lr": 5.719316743938086e-07, "epoch": 2.84, "percentage": 95.16, "elapsed_time": "10:45:51", "remaining_time": "0:32:50"}
178
+ {"current_steps": 178, "total_steps": 186, "loss": 0.3514, "lr": 4.521233403707781e-07, "epoch": 2.856, "percentage": 95.7, "elapsed_time": "10:49:28", "remaining_time": "0:29:11"}
179
+ {"current_steps": 179, "total_steps": 186, "loss": 0.3576, "lr": 3.463101437420857e-07, "epoch": 2.872, "percentage": 96.24, "elapsed_time": "10:53:05", "remaining_time": "0:25:32"}
180
+ {"current_steps": 180, "total_steps": 186, "loss": 0.3496, "lr": 2.5452952950732136e-07, "epoch": 2.888, "percentage": 96.77, "elapsed_time": "10:56:43", "remaining_time": "0:21:53"}
181
+ {"current_steps": 181, "total_steps": 186, "loss": 0.3584, "lr": 1.768139768389343e-07, "epoch": 2.904, "percentage": 97.31, "elapsed_time": "11:00:20", "remaining_time": "0:18:14"}
182
+ {"current_steps": 182, "total_steps": 186, "loss": 0.3559, "lr": 1.1319098758858726e-07, "epoch": 2.92, "percentage": 97.85, "elapsed_time": "11:03:58", "remaining_time": "0:14:35"}
183
+ {"current_steps": 183, "total_steps": 186, "loss": 0.3566, "lr": 6.368307655482576e-08, "epoch": 2.936, "percentage": 98.39, "elapsed_time": "11:07:36", "remaining_time": "0:10:56"}
184
+ {"current_steps": 184, "total_steps": 186, "loss": 0.3628, "lr": 2.830776351558484e-08, "epoch": 2.952, "percentage": 98.92, "elapsed_time": "11:11:13", "remaining_time": "0:07:17"}
185
+ {"current_steps": 185, "total_steps": 186, "loss": 0.3557, "lr": 7.077567028335175e-09, "epoch": 2.968, "percentage": 99.46, "elapsed_time": "11:14:51", "remaining_time": "0:03:38"}
186
+ {"current_steps": 186, "total_steps": 186, "loss": 0.3594, "lr": 0.0, "epoch": 2.984, "percentage": 100.0, "elapsed_time": "11:18:28", "remaining_time": "0:00:00"}
187
+ {"current_steps": 186, "total_steps": 186, "epoch": 2.984, "percentage": 100.0, "elapsed_time": "11:20:02", "remaining_time": "0:00:00"}