rbelanec commited on
Commit
011fba1
verified
1 Parent(s): 9f18cba

Training in progress, step 5610

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +53 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6e21ac65fa6a103b7233e217fcc83df4ce16a539b492dbe4f1f241404479698
3
  size 26214528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fa14fbbc5d4b30ee2cec3fc875d812e83a32983a2babe99b560e11869192d41
3
  size 26214528
trainer_log.jsonl CHANGED
@@ -1087,3 +1087,56 @@
1087
  {"current_steps": 5340, "total_steps": 5610, "loss": 0.1771, "lr": 3.5457516904947587e-07, "epoch": 9.518716577540108, "percentage": 95.19, "elapsed_time": "0:15:27", "remaining_time": "0:00:46", "throughput": 3571.61, "total_tokens": 3313672}
1088
  {"current_steps": 5345, "total_steps": 5610, "loss": 0.1688, "lr": 3.416415229512443e-07, "epoch": 9.527629233511586, "percentage": 95.28, "elapsed_time": "0:15:28", "remaining_time": "0:00:46", "throughput": 3572.23, "total_tokens": 3317224}
1089
  {"current_steps": 5350, "total_steps": 5610, "loss": 0.1661, "lr": 3.2894654410041417e-07, "epoch": 9.536541889483066, "percentage": 95.37, "elapsed_time": "0:15:29", "remaining_time": "0:00:45", "throughput": 3572.45, "total_tokens": 3319848}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1087
  {"current_steps": 5340, "total_steps": 5610, "loss": 0.1771, "lr": 3.5457516904947587e-07, "epoch": 9.518716577540108, "percentage": 95.19, "elapsed_time": "0:15:27", "remaining_time": "0:00:46", "throughput": 3571.61, "total_tokens": 3313672}
1088
  {"current_steps": 5345, "total_steps": 5610, "loss": 0.1688, "lr": 3.416415229512443e-07, "epoch": 9.527629233511586, "percentage": 95.28, "elapsed_time": "0:15:28", "remaining_time": "0:00:46", "throughput": 3572.23, "total_tokens": 3317224}
1089
  {"current_steps": 5350, "total_steps": 5610, "loss": 0.1661, "lr": 3.2894654410041417e-07, "epoch": 9.536541889483066, "percentage": 95.37, "elapsed_time": "0:15:29", "remaining_time": "0:00:45", "throughput": 3572.45, "total_tokens": 3319848}
1090
+ {"current_steps": 5355, "total_steps": 5610, "loss": 0.1521, "lr": 3.1649035537117123e-07, "epoch": 9.545454545454545, "percentage": 95.45, "elapsed_time": "0:15:29", "remaining_time": "0:00:44", "throughput": 3572.87, "total_tokens": 3322664}
1091
+ {"current_steps": 5360, "total_steps": 5610, "loss": 0.1512, "lr": 3.042730773264557e-07, "epoch": 9.554367201426025, "percentage": 95.54, "elapsed_time": "0:15:30", "remaining_time": "0:00:43", "throughput": 3573.39, "total_tokens": 3325928}
1092
+ {"current_steps": 5365, "total_steps": 5610, "loss": 0.1496, "lr": 2.9229482821680197e-07, "epoch": 9.563279857397504, "percentage": 95.63, "elapsed_time": "0:15:31", "remaining_time": "0:00:42", "throughput": 3573.82, "total_tokens": 3328680}
1093
+ {"current_steps": 5370, "total_steps": 5610, "loss": 0.152, "lr": 2.8055572397919784e-07, "epoch": 9.572192513368984, "percentage": 95.72, "elapsed_time": "0:15:32", "remaining_time": "0:00:41", "throughput": 3574.45, "total_tokens": 3331976}
1094
+ {"current_steps": 5375, "total_steps": 5610, "loss": 0.1609, "lr": 2.690558782359576e-07, "epoch": 9.581105169340464, "percentage": 95.81, "elapsed_time": "0:15:32", "remaining_time": "0:00:40", "throughput": 3574.82, "total_tokens": 3334888}
1095
+ {"current_steps": 5380, "total_steps": 5610, "loss": 0.1822, "lr": 2.5779540229361745e-07, "epoch": 9.590017825311943, "percentage": 95.9, "elapsed_time": "0:15:33", "remaining_time": "0:00:39", "throughput": 3575.3, "total_tokens": 3337960}
1096
+ {"current_steps": 5385, "total_steps": 5610, "loss": 0.155, "lr": 2.467744051418641e-07, "epoch": 9.598930481283423, "percentage": 95.99, "elapsed_time": "0:15:34", "remaining_time": "0:00:39", "throughput": 3575.69, "total_tokens": 3340936}
1097
+ {"current_steps": 5390, "total_steps": 5610, "loss": 0.1561, "lr": 2.3599299345248292e-07, "epoch": 9.607843137254902, "percentage": 96.08, "elapsed_time": "0:15:35", "remaining_time": "0:00:38", "throughput": 3576.12, "total_tokens": 3343784}
1098
+ {"current_steps": 5395, "total_steps": 5610, "loss": 0.1669, "lr": 2.2545127157831413e-07, "epoch": 9.616755793226382, "percentage": 96.17, "elapsed_time": "0:15:35", "remaining_time": "0:00:37", "throughput": 3576.65, "total_tokens": 3347016}
1099
+ {"current_steps": 5400, "total_steps": 5610, "loss": 0.1412, "lr": 2.1514934155226208e-07, "epoch": 9.62566844919786, "percentage": 96.26, "elapsed_time": "0:15:36", "remaining_time": "0:00:36", "throughput": 3577.03, "total_tokens": 3349800}
1100
+ {"current_steps": 5405, "total_steps": 5610, "loss": 0.1527, "lr": 2.0508730308627933e-07, "epoch": 9.63458110516934, "percentage": 96.35, "elapsed_time": "0:15:37", "remaining_time": "0:00:35", "throughput": 3577.64, "total_tokens": 3353640}
1101
+ {"current_steps": 5410, "total_steps": 5610, "loss": 0.1708, "lr": 1.9526525357043136e-07, "epoch": 9.643493761140821, "percentage": 96.43, "elapsed_time": "0:15:38", "remaining_time": "0:00:34", "throughput": 3578.18, "total_tokens": 3356904}
1102
+ {"current_steps": 5415, "total_steps": 5610, "loss": 0.1623, "lr": 1.8568328807193337e-07, "epoch": 9.6524064171123, "percentage": 96.52, "elapsed_time": "0:15:38", "remaining_time": "0:00:33", "throughput": 3578.83, "total_tokens": 3360232}
1103
+ {"current_steps": 5420, "total_steps": 5610, "loss": 0.1723, "lr": 1.7634149933423993e-07, "epoch": 9.66131907308378, "percentage": 96.61, "elapsed_time": "0:15:39", "remaining_time": "0:00:32", "throughput": 3579.19, "total_tokens": 3362824}
1104
+ {"current_steps": 5425, "total_steps": 5610, "loss": 0.2013, "lr": 1.6723997777614574e-07, "epoch": 9.670231729055258, "percentage": 96.7, "elapsed_time": "0:15:40", "remaining_time": "0:00:32", "throughput": 3579.69, "total_tokens": 3366152}
1105
+ {"current_steps": 5430, "total_steps": 5610, "loss": 0.1668, "lr": 1.5837881149090294e-07, "epoch": 9.679144385026738, "percentage": 96.79, "elapsed_time": "0:15:41", "remaining_time": "0:00:31", "throughput": 3580.14, "total_tokens": 3369192}
1106
+ {"current_steps": 5435, "total_steps": 5610, "loss": 0.1767, "lr": 1.497580862453829e-07, "epoch": 9.688057040998217, "percentage": 96.88, "elapsed_time": "0:15:41", "remaining_time": "0:00:30", "throughput": 3580.87, "total_tokens": 3372776}
1107
+ {"current_steps": 5440, "total_steps": 5610, "loss": 0.1829, "lr": 1.4137788547923246e-07, "epoch": 9.696969696969697, "percentage": 96.97, "elapsed_time": "0:15:42", "remaining_time": "0:00:29", "throughput": 3581.53, "total_tokens": 3376232}
1108
+ {"current_steps": 5445, "total_steps": 5610, "loss": 0.1916, "lr": 1.3323829030407465e-07, "epoch": 9.705882352941176, "percentage": 97.06, "elapsed_time": "0:15:43", "remaining_time": "0:00:28", "throughput": 3582.2, "total_tokens": 3379912}
1109
+ {"current_steps": 5450, "total_steps": 5610, "loss": 0.1639, "lr": 1.2533937950272023e-07, "epoch": 9.714795008912656, "percentage": 97.15, "elapsed_time": "0:15:44", "remaining_time": "0:00:27", "throughput": 3582.69, "total_tokens": 3382824}
1110
+ {"current_steps": 5455, "total_steps": 5610, "loss": 0.1577, "lr": 1.176812295283991e-07, "epoch": 9.723707664884136, "percentage": 97.24, "elapsed_time": "0:15:44", "remaining_time": "0:00:26", "throughput": 3583.12, "total_tokens": 3385640}
1111
+ {"current_steps": 5460, "total_steps": 5610, "loss": 0.1652, "lr": 1.1026391450404128e-07, "epoch": 9.732620320855615, "percentage": 97.33, "elapsed_time": "0:15:45", "remaining_time": "0:00:25", "throughput": 3583.97, "total_tokens": 3389672}
1112
+ {"current_steps": 5465, "total_steps": 5610, "loss": 0.1815, "lr": 1.0308750622153307e-07, "epoch": 9.741532976827095, "percentage": 97.42, "elapsed_time": "0:15:46", "remaining_time": "0:00:25", "throughput": 3584.54, "total_tokens": 3393096}
1113
+ {"current_steps": 5470, "total_steps": 5610, "loss": 0.149, "lr": 9.615207414103434e-08, "epoch": 9.750445632798574, "percentage": 97.5, "elapsed_time": "0:15:47", "remaining_time": "0:00:24", "throughput": 3584.93, "total_tokens": 3396136}
1114
+ {"current_steps": 5475, "total_steps": 5610, "loss": 0.1785, "lr": 8.945768539031785e-08, "epoch": 9.759358288770054, "percentage": 97.59, "elapsed_time": "0:15:48", "remaining_time": "0:00:23", "throughput": 3585.37, "total_tokens": 3399304}
1115
+ {"current_steps": 5480, "total_steps": 5610, "loss": 0.1617, "lr": 8.30044047640921e-08, "epoch": 9.768270944741532, "percentage": 97.68, "elapsed_time": "0:15:48", "remaining_time": "0:00:22", "throughput": 3585.83, "total_tokens": 3402216}
1116
+ {"current_steps": 5485, "total_steps": 5610, "loss": 0.1554, "lr": 7.679229472340176e-08, "epoch": 9.777183600713013, "percentage": 97.77, "elapsed_time": "0:15:49", "remaining_time": "0:00:21", "throughput": 3586.33, "total_tokens": 3405096}
1117
+ {"current_steps": 5490, "total_steps": 5610, "loss": 0.1639, "lr": 7.082141539500597e-08, "epoch": 9.786096256684491, "percentage": 97.86, "elapsed_time": "0:15:50", "remaining_time": "0:00:20", "throughput": 3586.72, "total_tokens": 3407912}
1118
+ {"current_steps": 5495, "total_steps": 5610, "loss": 0.1679, "lr": 6.509182457080376e-08, "epoch": 9.795008912655971, "percentage": 97.95, "elapsed_time": "0:15:50", "remaining_time": "0:00:19", "throughput": 3587.13, "total_tokens": 3410856}
1119
+ {"current_steps": 5500, "total_steps": 5610, "loss": 0.1559, "lr": 5.9603577707267875e-08, "epoch": 9.803921568627452, "percentage": 98.04, "elapsed_time": "0:15:51", "remaining_time": "0:00:19", "throughput": 3587.55, "total_tokens": 3413928}
1120
+ {"current_steps": 5505, "total_steps": 5610, "loss": 0.1623, "lr": 5.435672792491742e-08, "epoch": 9.81283422459893, "percentage": 98.13, "elapsed_time": "0:15:52", "remaining_time": "0:00:18", "throughput": 3588.18, "total_tokens": 3417416}
1121
+ {"current_steps": 5510, "total_steps": 5610, "loss": 0.1769, "lr": 4.935132600780157e-08, "epoch": 9.82174688057041, "percentage": 98.22, "elapsed_time": "0:15:53", "remaining_time": "0:00:17", "throughput": 3588.49, "total_tokens": 3420136}
1122
+ {"current_steps": 5515, "total_steps": 5610, "loss": 0.1537, "lr": 4.4587420402997235e-08, "epoch": 9.830659536541889, "percentage": 98.31, "elapsed_time": "0:15:53", "remaining_time": "0:00:16", "throughput": 3588.89, "total_tokens": 3423272}
1123
+ {"current_steps": 5520, "total_steps": 5610, "loss": 0.1499, "lr": 4.006505722015386e-08, "epoch": 9.83957219251337, "percentage": 98.4, "elapsed_time": "0:15:54", "remaining_time": "0:00:15", "throughput": 3589.43, "total_tokens": 3426472}
1124
+ {"current_steps": 5525, "total_steps": 5610, "loss": 0.1725, "lr": 3.578428023103819e-08, "epoch": 9.848484848484848, "percentage": 98.48, "elapsed_time": "0:15:55", "remaining_time": "0:00:14", "throughput": 3590.0, "total_tokens": 3429992}
1125
+ {"current_steps": 5530, "total_steps": 5610, "loss": 0.1554, "lr": 3.1745130869123566e-08, "epoch": 9.857397504456328, "percentage": 98.57, "elapsed_time": "0:15:56", "remaining_time": "0:00:13", "throughput": 3590.25, "total_tokens": 3432456}
1126
+ {"current_steps": 5535, "total_steps": 5610, "loss": 0.1618, "lr": 2.794764822916518e-08, "epoch": 9.866310160427808, "percentage": 98.66, "elapsed_time": "0:15:56", "remaining_time": "0:00:12", "throughput": 3590.36, "total_tokens": 3434888}
1127
+ {"current_steps": 5540, "total_steps": 5610, "loss": 0.1773, "lr": 2.4391869066844874e-08, "epoch": 9.875222816399287, "percentage": 98.75, "elapsed_time": "0:15:57", "remaining_time": "0:00:12", "throughput": 3590.75, "total_tokens": 3437832}
1128
+ {"current_steps": 5545, "total_steps": 5610, "loss": 0.1697, "lr": 2.1077827798404726e-08, "epoch": 9.884135472370767, "percentage": 98.84, "elapsed_time": "0:15:58", "remaining_time": "0:00:11", "throughput": 3591.26, "total_tokens": 3440872}
1129
+ {"current_steps": 5550, "total_steps": 5610, "loss": 0.1495, "lr": 1.8005556500313993e-08, "epoch": 9.893048128342246, "percentage": 98.93, "elapsed_time": "0:15:58", "remaining_time": "0:00:10", "throughput": 3591.61, "total_tokens": 3443784}
1130
+ {"current_steps": 5555, "total_steps": 5610, "loss": 0.1643, "lr": 1.51750849089638e-08, "epoch": 9.901960784313726, "percentage": 99.02, "elapsed_time": "0:15:59", "remaining_time": "0:00:09", "throughput": 3592.29, "total_tokens": 3447592}
1131
+ {"current_steps": 5560, "total_steps": 5610, "loss": 0.1714, "lr": 1.2586440420372936e-08, "epoch": 9.910873440285204, "percentage": 99.11, "elapsed_time": "0:16:00", "remaining_time": "0:00:08", "throughput": 3592.84, "total_tokens": 3451048}
1132
+ {"current_steps": 5565, "total_steps": 5610, "loss": 0.1497, "lr": 1.023964808992417e-08, "epoch": 9.919786096256685, "percentage": 99.2, "elapsed_time": "0:16:01", "remaining_time": "0:00:07", "throughput": 3593.37, "total_tokens": 3453928}
1133
+ {"current_steps": 5570, "total_steps": 5610, "loss": 0.1739, "lr": 8.134730632125554e-09, "epoch": 9.928698752228165, "percentage": 99.29, "elapsed_time": "0:16:01", "remaining_time": "0:00:06", "throughput": 3593.81, "total_tokens": 3456968}
1134
+ {"current_steps": 5575, "total_steps": 5610, "loss": 0.1683, "lr": 6.271708420385603e-09, "epoch": 9.937611408199643, "percentage": 99.38, "elapsed_time": "0:16:02", "remaining_time": "0:00:06", "throughput": 3594.48, "total_tokens": 3460616}
1135
+ {"current_steps": 5580, "total_steps": 5610, "loss": 0.1625, "lr": 4.650599486827334e-09, "epoch": 9.946524064171124, "percentage": 99.47, "elapsed_time": "0:16:03", "remaining_time": "0:00:05", "throughput": 3594.91, "total_tokens": 3463592}
1136
+ {"current_steps": 5585, "total_steps": 5610, "loss": 0.1604, "lr": 3.2714195220912013e-09, "epoch": 9.955436720142602, "percentage": 99.55, "elapsed_time": "0:16:04", "remaining_time": "0:00:04", "throughput": 3595.41, "total_tokens": 3466888}
1137
+ {"current_steps": 5590, "total_steps": 5610, "loss": 0.1602, "lr": 2.134181875204644e-09, "epoch": 9.964349376114082, "percentage": 99.64, "elapsed_time": "0:16:05", "remaining_time": "0:00:03", "throughput": 3595.94, "total_tokens": 3470408}
1138
+ {"current_steps": 5595, "total_steps": 5610, "loss": 0.1584, "lr": 1.2388975534460834e-09, "epoch": 9.973262032085561, "percentage": 99.73, "elapsed_time": "0:16:05", "remaining_time": "0:00:02", "throughput": 3596.47, "total_tokens": 3473608}
1139
+ {"current_steps": 5600, "total_steps": 5610, "loss": 0.163, "lr": 5.855752222366783e-10, "epoch": 9.982174688057041, "percentage": 99.82, "elapsed_time": "0:16:06", "remaining_time": "0:00:01", "throughput": 3596.88, "total_tokens": 3476616}
1140
+ {"current_steps": 5605, "total_steps": 5610, "loss": 0.1549, "lr": 1.7422120505705686e-10, "epoch": 9.99108734402852, "percentage": 99.91, "elapsed_time": "0:16:07", "remaining_time": "0:00:00", "throughput": 3597.38, "total_tokens": 3479624}
1141
+ {"current_steps": 5610, "total_steps": 5610, "loss": 0.1694, "lr": 4.839483383478616e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:16:07", "remaining_time": "0:00:00", "throughput": 3596.97, "total_tokens": 3481336}
1142
+ {"current_steps": 5610, "total_steps": 5610, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:16:08", "remaining_time": "0:00:00", "throughput": 3593.5, "total_tokens": 3481336}