diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -2727,3 +2727,1154 @@ Time to load utils op: 0.0003387928009033203 seconds [2022-12-14 20:01:44,904] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-2000/global_step2000/zero_pp_rank_0_mp_rank_00_optim_states.pt. [2022-12-14 20:01:44,905] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-2000/global_step2000/zero_pp_rank_0_mp_rank_00_optim_states.pt [2022-12-14 20:01:44,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +[2022-12-14 20:02:46,042] [INFO] [timer.py:197:stop] 0/2001, RunningAvgSamplesPerSec=29.963263124150934, CurrSamplesPerSec=29.121789811844277, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:02:52,098] [INFO] [timer.py:197:stop] 0/2002, RunningAvgSamplesPerSec=29.962855808325042, CurrSamplesPerSec=29.170182709017393, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:02:58,352] [INFO] [timer.py:197:stop] 0/2003, RunningAvgSamplesPerSec=29.962877619986003, CurrSamplesPerSec=30.00656457808395, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:04,298] [INFO] [timer.py:197:stop] 0/2004, RunningAvgSamplesPerSec=29.962911277596618, CurrSamplesPerSec=30.030411956342622, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:10,315] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536.0, reducing to 65536.0 +[2022-12-14 20:03:10,316] [INFO] [timer.py:197:stop] 0/2005, RunningAvgSamplesPerSec=29.963727038757163, CurrSamplesPerSec=31.691075961172935, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:16,386] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536.0, reducing to 32768.0 +[2022-12-14 20:03:16,388] [INFO] [timer.py:197:stop] 0/2006, RunningAvgSamplesPerSec=29.964472985379775, CurrSamplesPerSec=31.53705766865486, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:22,580] [INFO] [timer.py:197:stop] 0/2007, RunningAvgSamplesPerSec=29.964337166815618, CurrSamplesPerSec=29.694608065246943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:28,463] [INFO] [timer.py:197:stop] 0/2008, RunningAvgSamplesPerSec=29.964195363942135, CurrSamplesPerSec=29.682554282099392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:35,201] [INFO] [timer.py:197:stop] 0/2009, RunningAvgSamplesPerSec=29.96394657770101, CurrSamplesPerSec=29.473061403031064, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:41,331] [INFO] [logging.py:68:log_dist] [Rank 0] step=2010, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:03:41,331] [INFO] [timer.py:197:stop] 0/2010, RunningAvgSamplesPerSec=29.96414830727269, CurrSamplesPerSec=30.37456785041302, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:47,934] [INFO] [timer.py:197:stop] 0/2011, RunningAvgSamplesPerSec=29.96445283853812, CurrSamplesPerSec=30.588697214403233, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:54,005] [INFO] [timer.py:197:stop] 0/2012, RunningAvgSamplesPerSec=29.964177013968538, CurrSamplesPerSec=29.420111930712732, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:03:59,787] [INFO] [timer.py:197:stop] 0/2013, RunningAvgSamplesPerSec=29.964190501002285, CurrSamplesPerSec=29.991323999040493, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:04:06,151] [INFO] [timer.py:197:stop] 0/2014, RunningAvgSamplesPerSec=29.96409406659856, CurrSamplesPerSec=29.771412150832397, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:04:12,009] [INFO] [timer.py:197:stop] 0/2015, RunningAvgSamplesPerSec=29.963837648207885, CurrSamplesPerSec=29.456660698932666, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:04:18,362] [INFO] [timer.py:197:stop] 0/2016, RunningAvgSamplesPerSec=29.963725276891743, CurrSamplesPerSec=29.73921752317004, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:04:27,975] [INFO] [timer.py:197:stop] 0/2017, RunningAvgSamplesPerSec=29.963293327556524, CurrSamplesPerSec=29.117904343179685, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:04:34,024] [INFO] [timer.py:197:stop] 0/2018, RunningAvgSamplesPerSec=29.96319691645754, CurrSamplesPerSec=29.770180607045152, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:04:40,483] [INFO] [timer.py:197:stop] 0/2019, RunningAvgSamplesPerSec=29.96328114034665, CurrSamplesPerSec=30.13404466029924, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:04:46,573] [INFO] [logging.py:68:log_dist] [Rank 0] step=2020, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:04:46,574] [INFO] [timer.py:197:stop] 0/2020, RunningAvgSamplesPerSec=29.963174729355547, CurrSamplesPerSec=29.75007101281733, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:04:53,127] [INFO] [timer.py:197:stop] 0/2021, RunningAvgSamplesPerSec=29.96285736066995, CurrSamplesPerSec=29.335816858522776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:04:59,256] [INFO] [timer.py:197:stop] 0/2022, RunningAvgSamplesPerSec=29.962576541690815, CurrSamplesPerSec=29.40613757800799, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:05:05,846] [INFO] [timer.py:197:stop] 0/2023, RunningAvgSamplesPerSec=29.962740958503414, CurrSamplesPerSec=30.29858745287211, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:05:11,830] [INFO] [timer.py:197:stop] 0/2024, RunningAvgSamplesPerSec=29.962882664937837, CurrSamplesPerSec=30.25203650230771, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:05:17,999] [INFO] [timer.py:197:stop] 0/2025, RunningAvgSamplesPerSec=29.962668479737136, CurrSamplesPerSec=29.53575962563638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0005, 'learning_rate': 1e-05, 'epoch': 50.01} +[2022-12-14 20:05:24,018] [INFO] [timer.py:197:stop] 0/2026, RunningAvgSamplesPerSec=29.963029952631555, CurrSamplesPerSec=30.712592021203992, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:05:30,193] [INFO] [timer.py:197:stop] 0/2027, RunningAvgSamplesPerSec=29.963159384437375, CurrSamplesPerSec=30.227441134654022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:05:36,453] [INFO] [timer.py:197:stop] 0/2028, RunningAvgSamplesPerSec=29.96312062718328, CurrSamplesPerSec=29.884842325953805, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:05:42,397] [INFO] [timer.py:197:stop] 0/2029, RunningAvgSamplesPerSec=29.963273871763114, CurrSamplesPerSec=30.276999760658317, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:05:48,333] [INFO] [logging.py:68:log_dist] [Rank 0] step=2030, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:05:48,334] [INFO] [timer.py:197:stop] 0/2030, RunningAvgSamplesPerSec=29.963288641840563, CurrSamplesPerSec=29.993257548162763, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:05:54,311] [INFO] [timer.py:197:stop] 0/2031, RunningAvgSamplesPerSec=29.963302995168753, CurrSamplesPerSec=29.992439864385933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:06:00,456] [INFO] [timer.py:197:stop] 0/2032, RunningAvgSamplesPerSec=29.962844438173104, CurrSamplesPerSec=29.060466908841324, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:06:03,139] [INFO] [timer.py:197:stop] 0/2033, RunningAvgSamplesPerSec=29.96304397978002, CurrSamplesPerSec=30.37366738143982, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:06:05,292] [INFO] [timer.py:197:stop] 0/2034, RunningAvgSamplesPerSec=29.963101387366436, CurrSamplesPerSec=30.08015189590863, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:06:07,446] [INFO] [timer.py:197:stop] 0/2035, RunningAvgSamplesPerSec=29.9631674315204, CurrSamplesPerSec=30.097973229796825, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:06:09,735] [INFO] [timer.py:197:stop] 0/2036, RunningAvgSamplesPerSec=29.962292157423146, CurrSamplesPerSec=28.282660709689335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:06:11,865] [INFO] [timer.py:197:stop] 0/2037, RunningAvgSamplesPerSec=29.96251980338938, CurrSamplesPerSec=30.432823201169082, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:06:14,035] [INFO] [timer.py:197:stop] 0/2038, RunningAvgSamplesPerSec=29.96247266239665, CurrSamplesPerSec=29.866847060551372, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:06:16,175] [INFO] [timer.py:197:stop] 0/2039, RunningAvgSamplesPerSec=29.96262316595897, CurrSamplesPerSec=30.272216155672467, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:06:18,063] [INFO] [logging.py:68:log_dist] [Rank 0] step=2040, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:06:18,063] [INFO] [timer.py:197:stop] 0/2040, RunningAvgSamplesPerSec=29.964523945656975, CurrSamplesPerSec=34.411285923733175, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:07:01,600] [INFO] [timer.py:197:stop] 0/2041, RunningAvgSamplesPerSec=29.964195929235284, CurrSamplesPerSec=29.31029408842285, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:07:08,596] [INFO] [timer.py:197:stop] 0/2042, RunningAvgSamplesPerSec=29.96421963161301, CurrSamplesPerSec=30.012626893933465, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:07:15,912] [INFO] [timer.py:197:stop] 0/2043, RunningAvgSamplesPerSec=29.964446361742795, CurrSamplesPerSec=30.434230948975884, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:07:22,644] [INFO] [timer.py:197:stop] 0/2044, RunningAvgSamplesPerSec=29.96368760726679, CurrSamplesPerSec=28.491209330296655, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:07:29,265] [INFO] [timer.py:197:stop] 0/2045, RunningAvgSamplesPerSec=29.96345413231927, CurrSamplesPerSec=29.494168858842897, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:07:36,386] [INFO] [timer.py:197:stop] 0/2046, RunningAvgSamplesPerSec=29.96328284057931, CurrSamplesPerSec=29.617375732958052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:07:42,971] [INFO] [timer.py:197:stop] 0/2047, RunningAvgSamplesPerSec=29.96325925740244, CurrSamplesPerSec=29.915132706350096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:07:49,448] [INFO] [timer.py:197:stop] 0/2048, RunningAvgSamplesPerSec=29.963186292558525, CurrSamplesPerSec=29.81471292801342, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:07:56,574] [INFO] [timer.py:197:stop] 0/2049, RunningAvgSamplesPerSec=29.963112020377043, CurrSamplesPerSec=29.811918305698388, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:08:03,224] [INFO] [logging.py:68:log_dist] [Rank 0] step=2050, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:08:03,225] [INFO] [timer.py:197:stop] 0/2050, RunningAvgSamplesPerSec=29.96323378454745, CurrSamplesPerSec=30.214576877893517, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 51.0} +[2022-12-14 20:08:10,150] [INFO] [timer.py:197:stop] 0/2051, RunningAvgSamplesPerSec=29.963225037045945, CurrSamplesPerSec=29.94532086401143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:08:16,641] [INFO] [timer.py:197:stop] 0/2052, RunningAvgSamplesPerSec=29.96340496094874, CurrSamplesPerSec=30.336663785214956, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:08:23,119] [INFO] [timer.py:197:stop] 0/2053, RunningAvgSamplesPerSec=29.963116728777806, CurrSamplesPerSec=29.38367304538729, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:08:29,819] [INFO] [timer.py:197:stop] 0/2054, RunningAvgSamplesPerSec=29.96293197266961, CurrSamplesPerSec=29.588731929861922, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:08:36,336] [INFO] [timer.py:197:stop] 0/2055, RunningAvgSamplesPerSec=29.96312657480394, CurrSamplesPerSec=30.367846553574577, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:08:43,291] [INFO] [timer.py:197:stop] 0/2056, RunningAvgSamplesPerSec=29.962938178623467, CurrSamplesPerSec=29.58109228853139, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:08:50,174] [INFO] [timer.py:197:stop] 0/2057, RunningAvgSamplesPerSec=29.96320717560363, CurrSamplesPerSec=30.52611195780948, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:08:57,087] [INFO] [timer.py:197:stop] 0/2058, RunningAvgSamplesPerSec=29.962949329044893, CurrSamplesPerSec=29.44228670103925, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:09:03,910] [INFO] [timer.py:197:stop] 0/2059, RunningAvgSamplesPerSec=29.96296739283706, CurrSamplesPerSec=30.000152663173296, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:09:13,433] [INFO] [logging.py:68:log_dist] [Rank 0] step=2060, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:09:13,434] [INFO] [timer.py:197:stop] 0/2060, RunningAvgSamplesPerSec=29.962526051265147, CurrSamplesPerSec=29.08139689704346, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:09:20,200] [INFO] [timer.py:197:stop] 0/2061, RunningAvgSamplesPerSec=29.962438159436974, CurrSamplesPerSec=29.782642718401146, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:09:26,703] [INFO] [timer.py:197:stop] 0/2062, RunningAvgSamplesPerSec=29.96215734504799, CurrSamplesPerSec=29.394912285735124, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:09:33,614] [INFO] [timer.py:197:stop] 0/2063, RunningAvgSamplesPerSec=29.962470346674323, CurrSamplesPerSec=30.62144144057818, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:09:39,806] [INFO] [timer.py:197:stop] 0/2064, RunningAvgSamplesPerSec=29.96225362250751, CurrSamplesPerSec=29.52214924532701, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:09:46,824] [INFO] [timer.py:197:stop] 0/2065, RunningAvgSamplesPerSec=29.962111352211597, CurrSamplesPerSec=29.67159584135221, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:09:53,276] [INFO] [timer.py:197:stop] 0/2066, RunningAvgSamplesPerSec=29.96204096098847, CurrSamplesPerSec=29.81752463480856, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:00,418] [INFO] [timer.py:197:stop] 0/2067, RunningAvgSamplesPerSec=29.962049618230083, CurrSamplesPerSec=29.979928832761008, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:07,242] [INFO] [timer.py:197:stop] 0/2068, RunningAvgSamplesPerSec=29.961979417586665, CurrSamplesPerSec=29.817713425640513, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:14,630] [INFO] [timer.py:197:stop] 0/2069, RunningAvgSamplesPerSec=29.962016036324915, CurrSamplesPerSec=30.037861953747356, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:21,844] [INFO] [logging.py:68:log_dist] [Rank 0] step=2070, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:10:21,844] [INFO] [timer.py:197:stop] 0/2070, RunningAvgSamplesPerSec=29.961847151397908, CurrSamplesPerSec=29.61678427596196, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:28,918] [INFO] [timer.py:197:stop] 0/2071, RunningAvgSamplesPerSec=29.96199455652936, CurrSamplesPerSec=30.26996316099813, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:35,652] [INFO] [timer.py:197:stop] 0/2072, RunningAvgSamplesPerSec=29.96180418058848, CurrSamplesPerSec=29.5730297863752, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:38,275] [INFO] [timer.py:197:stop] 0/2073, RunningAvgSamplesPerSec=29.96175873467013, CurrSamplesPerSec=29.8679802688186, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:40,431] [INFO] [timer.py:197:stop] 0/2074, RunningAvgSamplesPerSec=29.96185484414453, CurrSamplesPerSec=30.1622293370512, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:42,569] [INFO] [timer.py:197:stop] 0/2075, RunningAvgSamplesPerSec=29.962015165691994, CurrSamplesPerSec=30.29792743950188, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 51.01} +[2022-12-14 20:10:44,681] [INFO] [timer.py:197:stop] 0/2076, RunningAvgSamplesPerSec=29.962354760835225, CurrSamplesPerSec=30.683282220334934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:46,818] [INFO] [timer.py:197:stop] 0/2077, RunningAvgSamplesPerSec=29.96251981442217, CurrSamplesPerSec=30.308799094559543, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:48,910] [INFO] [timer.py:197:stop] 0/2078, RunningAvgSamplesPerSec=29.962986530565058, CurrSamplesPerSec=30.963785000373733, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:51,053] [INFO] [timer.py:197:stop] 0/2079, RunningAvgSamplesPerSec=29.96311254114569, CurrSamplesPerSec=30.22701566676392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:10:52,987] [INFO] [logging.py:68:log_dist] [Rank 0] step=2080, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:10:52,988] [INFO] [timer.py:197:stop] 0/2080, RunningAvgSamplesPerSec=29.96466719201001, CurrSamplesPerSec=33.58387246219717, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:11:31,544] [INFO] [timer.py:197:stop] 0/2081, RunningAvgSamplesPerSec=29.964887081927436, CurrSamplesPerSec=30.428897377021826, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:11:37,791] [INFO] [timer.py:197:stop] 0/2082, RunningAvgSamplesPerSec=29.965129107384865, CurrSamplesPerSec=30.476897725365852, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:11:44,001] [INFO] [timer.py:197:stop] 0/2083, RunningAvgSamplesPerSec=29.964841533064405, CurrSamplesPerSec=29.378399069033513, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:11:50,379] [INFO] [timer.py:197:stop] 0/2084, RunningAvgSamplesPerSec=29.964439496113105, CurrSamplesPerSec=29.15053655313721, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:11:56,311] [INFO] [timer.py:197:stop] 0/2085, RunningAvgSamplesPerSec=29.96390608741792, CurrSamplesPerSec=28.89305728249975, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:02,415] [INFO] [timer.py:197:stop] 0/2086, RunningAvgSamplesPerSec=29.964024254814014, CurrSamplesPerSec=30.212206640405178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:08,595] [INFO] [timer.py:197:stop] 0/2087, RunningAvgSamplesPerSec=29.9640937893197, CurrSamplesPerSec=30.109708245852104, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:14,442] [INFO] [timer.py:197:stop] 0/2088, RunningAvgSamplesPerSec=29.96403116906517, CurrSamplesPerSec=29.83403464803212, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:20,864] [INFO] [timer.py:197:stop] 0/2089, RunningAvgSamplesPerSec=29.96366104613535, CurrSamplesPerSec=29.210988105276687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:26,887] [INFO] [logging.py:68:log_dist] [Rank 0] step=2090, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:12:26,888] [INFO] [timer.py:197:stop] 0/2090, RunningAvgSamplesPerSec=29.963716766583982, CurrSamplesPerSec=30.080458632786172, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:33,235] [INFO] [timer.py:197:stop] 0/2091, RunningAvgSamplesPerSec=29.963559747137726, CurrSamplesPerSec=29.63925335288412, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:39,452] [INFO] [timer.py:197:stop] 0/2092, RunningAvgSamplesPerSec=29.963254182270067, CurrSamplesPerSec=29.33825037501332, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:45,268] [INFO] [timer.py:197:stop] 0/2093, RunningAvgSamplesPerSec=29.96349703275905, CurrSamplesPerSec=30.479804573501468, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:51,509] [INFO] [timer.py:197:stop] 0/2094, RunningAvgSamplesPerSec=29.96335678890582, CurrSamplesPerSec=29.67295044365371, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:12:57,570] [INFO] [timer.py:197:stop] 0/2095, RunningAvgSamplesPerSec=29.96318935037387, CurrSamplesPerSec=29.616957462581258, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:13:03,604] [INFO] [timer.py:197:stop] 0/2096, RunningAvgSamplesPerSec=29.96314648512006, CurrSamplesPerSec=29.873697469050473, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:13:09,496] [INFO] [timer.py:197:stop] 0/2097, RunningAvgSamplesPerSec=29.963315807111485, CurrSamplesPerSec=30.322123912998293, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:13:15,404] [INFO] [timer.py:197:stop] 0/2098, RunningAvgSamplesPerSec=29.96297766727819, CurrSamplesPerSec=29.270944007195077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:13:22,997] [INFO] [timer.py:197:stop] 0/2099, RunningAvgSamplesPerSec=29.963037194425244, CurrSamplesPerSec=30.08832806651646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:13:29,618] [INFO] [logging.py:68:log_dist] [Rank 0] step=2100, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:13:29,619] [INFO] [timer.py:197:stop] 0/2100, RunningAvgSamplesPerSec=29.962865115585565, CurrSamplesPerSec=29.606311884900922, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 52.0} +[2022-12-14 20:13:35,825] [INFO] [timer.py:197:stop] 0/2101, RunningAvgSamplesPerSec=29.96292556493461, CurrSamplesPerSec=30.090287635915256, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:13:42,396] [INFO] [timer.py:197:stop] 0/2102, RunningAvgSamplesPerSec=29.962660929902892, CurrSamplesPerSec=29.4173069711343, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:13:49,013] [INFO] [timer.py:197:stop] 0/2103, RunningAvgSamplesPerSec=29.962658617339475, CurrSamplesPerSec=29.957803021544002, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:13:55,168] [INFO] [timer.py:197:stop] 0/2104, RunningAvgSamplesPerSec=29.96286283664626, CurrSamplesPerSec=30.398164030336623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:01,537] [INFO] [timer.py:197:stop] 0/2105, RunningAvgSamplesPerSec=29.96273728350441, CurrSamplesPerSec=29.701129921186226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:07,644] [INFO] [timer.py:197:stop] 0/2106, RunningAvgSamplesPerSec=29.962703219863464, CurrSamplesPerSec=29.8912383248597, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:13,835] [INFO] [timer.py:197:stop] 0/2107, RunningAvgSamplesPerSec=29.962941252347992, CurrSamplesPerSec=30.47227905379043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:20,203] [INFO] [timer.py:197:stop] 0/2108, RunningAvgSamplesPerSec=29.962578384146425, CurrSamplesPerSec=29.217738092568812, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:26,176] [INFO] [timer.py:197:stop] 0/2109, RunningAvgSamplesPerSec=29.962731179665802, CurrSamplesPerSec=30.2880136011776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:32,341] [INFO] [logging.py:68:log_dist] [Rank 0] step=2110, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:14:32,341] [INFO] [timer.py:197:stop] 0/2110, RunningAvgSamplesPerSec=29.962611164733058, CurrSamplesPerSec=29.711856962724735, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:38,375] [INFO] [timer.py:197:stop] 0/2111, RunningAvgSamplesPerSec=29.962508665204215, CurrSamplesPerSec=29.747987372295317, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:44,137] [INFO] [timer.py:197:stop] 0/2112, RunningAvgSamplesPerSec=29.962719781094414, CurrSamplesPerSec=30.41468250483721, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:46,643] [INFO] [timer.py:197:stop] 0/2113, RunningAvgSamplesPerSec=29.962651144516066, CurrSamplesPerSec=29.818524922228253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:48,818] [INFO] [timer.py:197:stop] 0/2114, RunningAvgSamplesPerSec=29.962563298604742, CurrSamplesPerSec=29.778261789341745, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:50,931] [INFO] [timer.py:197:stop] 0/2115, RunningAvgSamplesPerSec=29.962880621644437, CurrSamplesPerSec=30.648407469505493, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:53,040] [INFO] [timer.py:197:stop] 0/2116, RunningAvgSamplesPerSec=29.963228144437707, CurrSamplesPerSec=30.716000911288823, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:55,209] [INFO] [timer.py:197:stop] 0/2117, RunningAvgSamplesPerSec=29.963178032651694, CurrSamplesPerSec=29.857615116546665, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:57,362] [INFO] [timer.py:197:stop] 0/2118, RunningAvgSamplesPerSec=29.963234846577567, CurrSamplesPerSec=30.083880352800097, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:14:59,536] [INFO] [timer.py:197:stop] 0/2119, RunningAvgSamplesPerSec=29.96315496725, CurrSamplesPerSec=29.795078889367524, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:15:01,451] [INFO] [logging.py:68:log_dist] [Rank 0] step=2120, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:15:01,452] [INFO] [timer.py:197:stop] 0/2120, RunningAvgSamplesPerSec=29.964769869726073, CurrSamplesPerSec=33.82403918836556, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:15:43,989] [INFO] [timer.py:197:stop] 0/2121, RunningAvgSamplesPerSec=29.964526964838356, CurrSamplesPerSec=29.458742521469173, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:15:51,109] [INFO] [timer.py:197:stop] 0/2122, RunningAvgSamplesPerSec=29.964396679744702, CurrSamplesPerSec=29.690844106272102, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:15:57,757] [INFO] [timer.py:197:stop] 0/2123, RunningAvgSamplesPerSec=29.96431499356518, CurrSamplesPerSec=29.792135848401585, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:16:04,498] [INFO] [timer.py:197:stop] 0/2124, RunningAvgSamplesPerSec=29.96438174871421, CurrSamplesPerSec=30.106641944497593, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:16:11,068] [INFO] [timer.py:197:stop] 0/2125, RunningAvgSamplesPerSec=29.964207661887368, CurrSamplesPerSec=29.599296326676612, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 53.0} +[2022-12-14 20:16:18,068] [INFO] [timer.py:197:stop] 0/2126, RunningAvgSamplesPerSec=29.96437397005689, CurrSamplesPerSec=30.321658100009397, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:16:25,032] [INFO] [timer.py:197:stop] 0/2127, RunningAvgSamplesPerSec=29.96424731517256, CurrSamplesPerSec=29.697627147745983, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:16:31,384] [INFO] [timer.py:197:stop] 0/2128, RunningAvgSamplesPerSec=29.96425416820303, CurrSamplesPerSec=29.978823942232943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:16:38,541] [INFO] [timer.py:197:stop] 0/2129, RunningAvgSamplesPerSec=29.963783332478894, CurrSamplesPerSec=28.99516047846497, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:16:44,880] [INFO] [logging.py:68:log_dist] [Rank 0] step=2130, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:16:44,881] [INFO] [timer.py:197:stop] 0/2130, RunningAvgSamplesPerSec=29.963547900653275, CurrSamplesPerSec=29.47101961858515, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:16:52,184] [INFO] [timer.py:197:stop] 0/2131, RunningAvgSamplesPerSec=29.963706735563495, CurrSamplesPerSec=30.305565518718385, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:16:58,778] [INFO] [timer.py:197:stop] 0/2132, RunningAvgSamplesPerSec=29.96395066729938, CurrSamplesPerSec=30.49244542669756, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:17:05,288] [INFO] [timer.py:197:stop] 0/2133, RunningAvgSamplesPerSec=29.96392163161897, CurrSamplesPerSec=29.902203080210754, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:17:11,818] [INFO] [timer.py:197:stop] 0/2134, RunningAvgSamplesPerSec=29.963946966789095, CurrSamplesPerSec=30.018033713940206, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:17:17,977] [INFO] [timer.py:197:stop] 0/2135, RunningAvgSamplesPerSec=29.963831319551534, CurrSamplesPerSec=29.71928462597729, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:17:24,688] [INFO] [timer.py:197:stop] 0/2136, RunningAvgSamplesPerSec=29.96402935029227, CurrSamplesPerSec=30.39247145222972, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:17:30,770] [INFO] [timer.py:197:stop] 0/2137, RunningAvgSamplesPerSec=29.964295638402355, CurrSamplesPerSec=30.543544801996674, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:17:37,258] [INFO] [timer.py:197:stop] 0/2138, RunningAvgSamplesPerSec=29.964259999040294, CurrSamplesPerSec=29.8883627816556, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:17:43,857] [INFO] [timer.py:197:stop] 0/2139, RunningAvgSamplesPerSec=29.963938598822725, CurrSamplesPerSec=29.2928112888575, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:17:50,362] [INFO] [logging.py:68:log_dist] [Rank 0] step=2140, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:17:50,362] [INFO] [timer.py:197:stop] 0/2140, RunningAvgSamplesPerSec=29.963905515072412, CurrSamplesPerSec=29.89337204258686, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:17:56,920] [INFO] [timer.py:197:stop] 0/2141, RunningAvgSamplesPerSec=29.963827999426904, CurrSamplesPerSec=29.79901156723874, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:18:03,756] [INFO] [timer.py:197:stop] 0/2142, RunningAvgSamplesPerSec=29.963786744681908, CurrSamplesPerSec=29.875802082704976, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:18:10,672] [INFO] [timer.py:197:stop] 0/2143, RunningAvgSamplesPerSec=29.96394481150726, CurrSamplesPerSec=30.30607189561005, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:18:17,273] [INFO] [timer.py:197:stop] 0/2144, RunningAvgSamplesPerSec=29.96421371049948, CurrSamplesPerSec=30.551209879803015, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:18:24,171] [INFO] [timer.py:197:stop] 0/2145, RunningAvgSamplesPerSec=29.964164614388363, CurrSamplesPerSec=29.85936871349482, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:18:30,851] [INFO] [timer.py:197:stop] 0/2146, RunningAvgSamplesPerSec=29.96397071905795, CurrSamplesPerSec=29.554138915612057, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:18:37,912] [INFO] [timer.py:197:stop] 0/2147, RunningAvgSamplesPerSec=29.96381237324194, CurrSamplesPerSec=29.628124104251174, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:18:44,468] [INFO] [timer.py:197:stop] 0/2148, RunningAvgSamplesPerSec=29.964007478546613, CurrSamplesPerSec=30.388439065812506, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:18:50,749] [INFO] [timer.py:197:stop] 0/2149, RunningAvgSamplesPerSec=29.96418245290787, CurrSamplesPerSec=30.344444896611908, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:18:57,930] [INFO] [logging.py:68:log_dist] [Rank 0] step=2150, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:18:57,931] [INFO] [timer.py:197:stop] 0/2150, RunningAvgSamplesPerSec=29.964090874498563, CurrSamplesPerSec=29.768754388758744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 53.01} +[2022-12-14 20:19:04,318] [INFO] [timer.py:197:stop] 0/2151, RunningAvgSamplesPerSec=29.96402938321466, CurrSamplesPerSec=29.83252605000178, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:19:11,236] [INFO] [timer.py:197:stop] 0/2152, RunningAvgSamplesPerSec=29.963967848264762, CurrSamplesPerSec=29.832310548451932, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:19:13,656] [INFO] [timer.py:197:stop] 0/2153, RunningAvgSamplesPerSec=29.96424697257075, CurrSamplesPerSec=30.576634699850917, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:19:15,772] [INFO] [timer.py:197:stop] 0/2154, RunningAvgSamplesPerSec=29.96454530396617, CurrSamplesPerSec=30.620306224215295, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:19:17,920] [INFO] [timer.py:197:stop] 0/2155, RunningAvgSamplesPerSec=29.96464254364827, CurrSamplesPerSec=30.17537468334936, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:19:20,070] [INFO] [timer.py:197:stop] 0/2156, RunningAvgSamplesPerSec=29.964852513297714, CurrSamplesPerSec=30.42384497312934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:19:22,277] [INFO] [timer.py:197:stop] 0/2157, RunningAvgSamplesPerSec=29.96475107513967, CurrSamplesPerSec=29.747835726310402, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:19:24,455] [INFO] [timer.py:197:stop] 0/2158, RunningAvgSamplesPerSec=29.964658756386896, CurrSamplesPerSec=29.767024622195017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:19:26,606] [INFO] [timer.py:197:stop] 0/2159, RunningAvgSamplesPerSec=29.964733481701373, CurrSamplesPerSec=30.126712557072143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:19:28,511] [INFO] [logging.py:68:log_dist] [Rank 0] step=2160, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:19:28,512] [INFO] [timer.py:197:stop] 0/2160, RunningAvgSamplesPerSec=29.966417532165064, CurrSamplesPerSec=34.100244780487074, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:20:07,705] [INFO] [timer.py:197:stop] 0/2161, RunningAvgSamplesPerSec=29.966621362130883, CurrSamplesPerSec=30.41304225415623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:20:14,145] [INFO] [timer.py:197:stop] 0/2162, RunningAvgSamplesPerSec=29.966086137168052, CurrSamplesPerSec=28.85346033484941, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:20:20,468] [INFO] [timer.py:197:stop] 0/2163, RunningAvgSamplesPerSec=29.96559355348362, CurrSamplesPerSec=28.938112205753416, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:20:26,746] [INFO] [timer.py:197:stop] 0/2164, RunningAvgSamplesPerSec=29.965579483810863, CurrSamplesPerSec=29.935205753937453, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:20:32,573] [INFO] [timer.py:197:stop] 0/2165, RunningAvgSamplesPerSec=29.965766336782636, CurrSamplesPerSec=30.37526557873306, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:20:38,509] [INFO] [timer.py:197:stop] 0/2166, RunningAvgSamplesPerSec=29.965949055906098, CurrSamplesPerSec=30.366455245711816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:20:44,534] [INFO] [timer.py:197:stop] 0/2167, RunningAvgSamplesPerSec=29.965566050894708, CurrSamplesPerSec=29.15906078513545, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:20:50,504] [INFO] [timer.py:197:stop] 0/2168, RunningAvgSamplesPerSec=29.965732493964378, CurrSamplesPerSec=30.330469870364535, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:20:56,742] [INFO] [timer.py:197:stop] 0/2169, RunningAvgSamplesPerSec=29.96579780302747, CurrSamplesPerSec=30.10792849930118, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:21:02,744] [INFO] [logging.py:68:log_dist] [Rank 0] step=2170, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:21:02,745] [INFO] [timer.py:197:stop] 0/2170, RunningAvgSamplesPerSec=29.96577293586929, CurrSamplesPerSec=29.91198257935156, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:21:10,840] [INFO] [timer.py:197:stop] 0/2171, RunningAvgSamplesPerSec=29.965803988585385, CurrSamplesPerSec=30.033277936489572, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:21:16,896] [INFO] [timer.py:197:stop] 0/2172, RunningAvgSamplesPerSec=29.96575113964789, CurrSamplesPerSec=29.851558820654418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:21:23,200] [INFO] [timer.py:197:stop] 0/2173, RunningAvgSamplesPerSec=29.965612424260367, CurrSamplesPerSec=29.667595074901335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:21:29,131] [INFO] [timer.py:197:stop] 0/2174, RunningAvgSamplesPerSec=29.96561951761288, CurrSamplesPerSec=29.981027107689933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:21:35,104] [INFO] [timer.py:197:stop] 0/2175, RunningAvgSamplesPerSec=29.965847104426178, CurrSamplesPerSec=30.468460625453943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 54.0} +[2022-12-14 20:21:42,201] [INFO] [timer.py:197:stop] 0/2176, RunningAvgSamplesPerSec=29.96511941842439, CurrSamplesPerSec=28.463153090860857, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:21:48,824] [INFO] [timer.py:197:stop] 0/2177, RunningAvgSamplesPerSec=29.964948881601543, CurrSamplesPerSec=29.598734964555124, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:21:55,104] [INFO] [timer.py:197:stop] 0/2178, RunningAvgSamplesPerSec=29.965247636698166, CurrSamplesPerSec=30.629449726299715, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:01,004] [INFO] [timer.py:197:stop] 0/2179, RunningAvgSamplesPerSec=29.96506903737993, CurrSamplesPerSec=29.58141501045296, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:07,381] [INFO] [logging.py:68:log_dist] [Rank 0] step=2180, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:22:07,382] [INFO] [timer.py:197:stop] 0/2180, RunningAvgSamplesPerSec=29.965127998516913, CurrSamplesPerSec=30.09403884891828, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:13,848] [INFO] [timer.py:197:stop] 0/2181, RunningAvgSamplesPerSec=29.96502090703704, CurrSamplesPerSec=29.733578023275083, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:19,762] [INFO] [timer.py:197:stop] 0/2182, RunningAvgSamplesPerSec=29.964764116449448, CurrSamplesPerSec=29.41547919913238, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:26,063] [INFO] [timer.py:197:stop] 0/2183, RunningAvgSamplesPerSec=29.965034546431582, CurrSamplesPerSec=30.566408879211565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:32,107] [INFO] [timer.py:197:stop] 0/2184, RunningAvgSamplesPerSec=29.96486326288516, CurrSamplesPerSec=29.59589584282201, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:38,068] [INFO] [timer.py:197:stop] 0/2185, RunningAvgSamplesPerSec=29.964918676617472, CurrSamplesPerSec=30.086321543085454, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:43,989] [INFO] [timer.py:197:stop] 0/2186, RunningAvgSamplesPerSec=29.964655627864534, CurrSamplesPerSec=29.401222640157073, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:50,262] [INFO] [timer.py:197:stop] 0/2187, RunningAvgSamplesPerSec=29.96445390830484, CurrSamplesPerSec=29.53028473049199, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:22:57,380] [INFO] [timer.py:197:stop] 0/2188, RunningAvgSamplesPerSec=29.96429900250537, CurrSamplesPerSec=29.629612099824033, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:03,388] [INFO] [timer.py:197:stop] 0/2189, RunningAvgSamplesPerSec=29.963942802235994, CurrSamplesPerSec=29.20501968412916, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:09,527] [INFO] [logging.py:68:log_dist] [Rank 0] step=2190, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:23:09,527] [INFO] [timer.py:197:stop] 0/2190, RunningAvgSamplesPerSec=29.963800872928466, CurrSamplesPerSec=29.656585422332878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:15,774] [INFO] [timer.py:197:stop] 0/2191, RunningAvgSamplesPerSec=29.96396557331264, CurrSamplesPerSec=30.328718757156786, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:21,484] [INFO] [timer.py:197:stop] 0/2192, RunningAvgSamplesPerSec=29.964170489695334, CurrSamplesPerSec=30.419552610602512, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:23,846] [INFO] [timer.py:197:stop] 0/2193, RunningAvgSamplesPerSec=29.964128161824835, CurrSamplesPerSec=29.87171614489875, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:25,981] [INFO] [timer.py:197:stop] 0/2194, RunningAvgSamplesPerSec=29.96430652636085, CurrSamplesPerSec=30.360269763548036, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:28,168] [INFO] [timer.py:197:stop] 0/2195, RunningAvgSamplesPerSec=29.96420736235743, CurrSamplesPerSec=29.74840605510663, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:30,308] [INFO] [timer.py:197:stop] 0/2196, RunningAvgSamplesPerSec=29.96434703343757, CurrSamplesPerSec=30.27381052270687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:32,483] [INFO] [timer.py:197:stop] 0/2197, RunningAvgSamplesPerSec=29.964427307404105, CurrSamplesPerSec=30.141590169839574, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:34,603] [INFO] [timer.py:197:stop] 0/2198, RunningAvgSamplesPerSec=29.964695198242406, CurrSamplesPerSec=30.564491211706066, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:36,742] [INFO] [timer.py:197:stop] 0/2199, RunningAvgSamplesPerSec=29.96483754412884, CurrSamplesPerSec=30.280725942044434, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:23:38,626] [INFO] [logging.py:68:log_dist] [Rank 0] step=2200, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:23:38,627] [INFO] [timer.py:197:stop] 0/2200, RunningAvgSamplesPerSec=29.966615935326324, CurrSamplesPerSec=34.45984725674741, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 54.01} +[2022-12-14 20:24:16,655] [INFO] [timer.py:197:stop] 0/2201, RunningAvgSamplesPerSec=29.966397960968987, CurrSamplesPerSec=29.49483320754269, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:24:22,939] [INFO] [timer.py:197:stop] 0/2202, RunningAvgSamplesPerSec=29.96592055200626, CurrSamplesPerSec=28.95164828954382, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:24:29,180] [INFO] [timer.py:197:stop] 0/2203, RunningAvgSamplesPerSec=29.966051950620333, CurrSamplesPerSec=30.257946029614377, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:24:35,035] [INFO] [timer.py:197:stop] 0/2204, RunningAvgSamplesPerSec=29.965926029070875, CurrSamplesPerSec=29.691313727930094, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:24:41,345] [INFO] [timer.py:197:stop] 0/2205, RunningAvgSamplesPerSec=29.96623412467606, CurrSamplesPerSec=30.660383108223254, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:24:47,326] [INFO] [timer.py:197:stop] 0/2206, RunningAvgSamplesPerSec=29.966400519770307, CurrSamplesPerSec=30.337510631983402, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:24:53,474] [INFO] [timer.py:197:stop] 0/2207, RunningAvgSamplesPerSec=29.96644558020851, CurrSamplesPerSec=30.066089169349183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:24:59,330] [INFO] [timer.py:197:stop] 0/2208, RunningAvgSamplesPerSec=29.96653161069774, CurrSamplesPerSec=30.15743788208606, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:05,551] [INFO] [timer.py:197:stop] 0/2209, RunningAvgSamplesPerSec=29.966010804274383, CurrSamplesPerSec=28.859552744617446, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:11,537] [INFO] [logging.py:68:log_dist] [Rank 0] step=2210, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:25:11,537] [INFO] [timer.py:197:stop] 0/2210, RunningAvgSamplesPerSec=29.965659095616566, CurrSamplesPerSec=29.2090460349496, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:17,867] [INFO] [timer.py:197:stop] 0/2211, RunningAvgSamplesPerSec=29.965536431483972, CurrSamplesPerSec=29.697121186860606, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:23,947] [INFO] [timer.py:197:stop] 0/2212, RunningAvgSamplesPerSec=29.96533587970355, CurrSamplesPerSec=29.528774211716666, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:29,787] [INFO] [timer.py:197:stop] 0/2213, RunningAvgSamplesPerSec=29.965559163145407, CurrSamplesPerSec=30.46728139491754, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:35,657] [INFO] [timer.py:197:stop] 0/2214, RunningAvgSamplesPerSec=29.96544326534961, CurrSamplesPerSec=29.711366959634734, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:41,421] [INFO] [timer.py:197:stop] 0/2215, RunningAvgSamplesPerSec=29.965518484940738, CurrSamplesPerSec=30.1328336703185, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:47,522] [INFO] [timer.py:197:stop] 0/2216, RunningAvgSamplesPerSec=29.965360878036712, CurrSamplesPerSec=29.6205915810286, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:53,388] [INFO] [timer.py:197:stop] 0/2217, RunningAvgSamplesPerSec=29.965007984495923, CurrSamplesPerSec=29.203564496593895, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:25:59,082] [INFO] [timer.py:197:stop] 0/2218, RunningAvgSamplesPerSec=29.96526139712323, CurrSamplesPerSec=30.53729044646925, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:26:05,029] [INFO] [timer.py:197:stop] 0/2219, RunningAvgSamplesPerSec=29.965091252258485, CurrSamplesPerSec=29.592737552049165, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:26:10,755] [INFO] [logging.py:68:log_dist] [Rank 0] step=2220, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:26:10,755] [INFO] [timer.py:197:stop] 0/2220, RunningAvgSamplesPerSec=29.965299465747893, CurrSamplesPerSec=30.434134334670038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:26:16,752] [INFO] [timer.py:197:stop] 0/2221, RunningAvgSamplesPerSec=29.96531320811506, CurrSamplesPerSec=29.995824828748027, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:26:22,509] [INFO] [timer.py:197:stop] 0/2222, RunningAvgSamplesPerSec=29.96567055502687, CurrSamplesPerSec=30.780186828485157, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:26:28,565] [INFO] [timer.py:197:stop] 0/2223, RunningAvgSamplesPerSec=29.965944675010928, CurrSamplesPerSec=30.58711133206481, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:26:34,582] [INFO] [timer.py:197:stop] 0/2224, RunningAvgSamplesPerSec=29.965680704478427, CurrSamplesPerSec=29.390657545525414, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:26:40,543] [INFO] [timer.py:197:stop] 0/2225, RunningAvgSamplesPerSec=29.96562060783633, CurrSamplesPerSec=29.832678560672253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 55.01} +[2022-12-14 20:26:46,242] [INFO] [timer.py:197:stop] 0/2226, RunningAvgSamplesPerSec=29.965901542174205, CurrSamplesPerSec=30.603717222064827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:26:52,700] [INFO] [timer.py:197:stop] 0/2227, RunningAvgSamplesPerSec=29.9656754142019, CurrSamplesPerSec=29.471071387753707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:26:58,625] [INFO] [timer.py:197:stop] 0/2228, RunningAvgSamplesPerSec=29.965965938190088, CurrSamplesPerSec=30.626640060149896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:04,486] [INFO] [timer.py:197:stop] 0/2229, RunningAvgSamplesPerSec=29.96619238191353, CurrSamplesPerSec=30.478884011599522, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:10,956] [INFO] [logging.py:68:log_dist] [Rank 0] step=2230, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:27:10,956] [INFO] [timer.py:197:stop] 0/2230, RunningAvgSamplesPerSec=29.9660938748794, CurrSamplesPerSec=29.7483137460725, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:17,174] [INFO] [timer.py:197:stop] 0/2231, RunningAvgSamplesPerSec=29.966110321489722, CurrSamplesPerSec=30.00279825202017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:23,424] [INFO] [timer.py:197:stop] 0/2232, RunningAvgSamplesPerSec=29.96606847359631, CurrSamplesPerSec=29.873079108132607, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:25,903] [INFO] [timer.py:197:stop] 0/2233, RunningAvgSamplesPerSec=29.966288491674884, CurrSamplesPerSec=30.46509954041682, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:28,072] [INFO] [timer.py:197:stop] 0/2234, RunningAvgSamplesPerSec=29.96624442626348, CurrSamplesPerSec=29.86825610671751, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:30,244] [INFO] [timer.py:197:stop] 0/2235, RunningAvgSamplesPerSec=29.966177469369057, CurrSamplesPerSec=29.817471641320534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:32,358] [INFO] [timer.py:197:stop] 0/2236, RunningAvgSamplesPerSec=29.966471284147197, CurrSamplesPerSec=30.63725239775597, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:34,529] [INFO] [timer.py:197:stop] 0/2237, RunningAvgSamplesPerSec=29.966409108708582, CurrSamplesPerSec=29.82815032113252, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:36,667] [INFO] [timer.py:197:stop] 0/2238, RunningAvgSamplesPerSec=29.96655462457435, CurrSamplesPerSec=30.295352636357634, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:38,807] [INFO] [timer.py:197:stop] 0/2239, RunningAvgSamplesPerSec=29.966688469871553, CurrSamplesPerSec=30.268986967449237, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:27:40,694] [INFO] [logging.py:68:log_dist] [Rank 0] step=2240, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:27:40,695] [INFO] [timer.py:197:stop] 0/2240, RunningAvgSamplesPerSec=29.968389048769474, CurrSamplesPerSec=34.32598576350269, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:28:19,604] [INFO] [timer.py:197:stop] 0/2241, RunningAvgSamplesPerSec=29.968132470358483, CurrSamplesPerSec=29.404710595196953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:28:25,799] [INFO] [timer.py:197:stop] 0/2242, RunningAvgSamplesPerSec=29.967943129871063, CurrSamplesPerSec=29.54992578818474, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:28:32,121] [INFO] [timer.py:197:stop] 0/2243, RunningAvgSamplesPerSec=29.967558350664884, CurrSamplesPerSec=29.129759845784587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:28:38,083] [INFO] [timer.py:197:stop] 0/2244, RunningAvgSamplesPerSec=29.967469330873392, CurrSamplesPerSec=29.7692958073866, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:28:44,307] [INFO] [timer.py:197:stop] 0/2245, RunningAvgSamplesPerSec=29.967567731253453, CurrSamplesPerSec=30.189818265329176, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:28:50,437] [INFO] [timer.py:197:stop] 0/2246, RunningAvgSamplesPerSec=29.96765057072747, CurrSamplesPerSec=30.154619293172026, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:28:56,398] [INFO] [timer.py:197:stop] 0/2247, RunningAvgSamplesPerSec=29.967267421084575, CurrSamplesPerSec=29.13147008433662, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:29:02,508] [INFO] [timer.py:197:stop] 0/2248, RunningAvgSamplesPerSec=29.96726840128781, CurrSamplesPerSec=29.96946911922388, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:29:08,305] [INFO] [timer.py:197:stop] 0/2249, RunningAvgSamplesPerSec=29.967373518701052, CurrSamplesPerSec=30.205342871439793, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:29:14,435] [INFO] [logging.py:68:log_dist] [Rank 0] step=2250, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:29:14,436] [INFO] [timer.py:197:stop] 0/2250, RunningAvgSamplesPerSec=29.967077449313603, CurrSamplesPerSec=29.31626399805953, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0004, 'learning_rate': 1e-05, 'epoch': 56.0} +[2022-12-14 20:29:20,673] [INFO] [timer.py:197:stop] 0/2251, RunningAvgSamplesPerSec=29.96723411133297, CurrSamplesPerSec=30.32360022371433, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:29:26,716] [INFO] [timer.py:197:stop] 0/2252, RunningAvgSamplesPerSec=29.967016914987063, CurrSamplesPerSec=29.486380387775974, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:29:32,792] [INFO] [timer.py:197:stop] 0/2253, RunningAvgSamplesPerSec=29.96685469618096, CurrSamplesPerSec=29.606256374182475, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:29:38,840] [INFO] [timer.py:197:stop] 0/2254, RunningAvgSamplesPerSec=29.966745927082158, CurrSamplesPerSec=29.723891774146864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:29:44,633] [INFO] [timer.py:197:stop] 0/2255, RunningAvgSamplesPerSec=29.966497314296806, CurrSamplesPerSec=29.41689433256031, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:29:51,167] [INFO] [timer.py:197:stop] 0/2256, RunningAvgSamplesPerSec=29.96645517970199, CurrSamplesPerSec=29.871825842215245, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:29:57,484] [INFO] [timer.py:197:stop] 0/2257, RunningAvgSamplesPerSec=29.966605319722166, CurrSamplesPerSec=30.308888070421144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:03,799] [INFO] [timer.py:197:stop] 0/2258, RunningAvgSamplesPerSec=29.966759537985208, CurrSamplesPerSec=30.31860669390237, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:10,356] [INFO] [timer.py:197:stop] 0/2259, RunningAvgSamplesPerSec=29.966587340731376, CurrSamplesPerSec=29.58308414934134, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:17,481] [INFO] [logging.py:68:log_dist] [Rank 0] step=2260, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:30:17,482] [INFO] [timer.py:197:stop] 0/2260, RunningAvgSamplesPerSec=29.966508412904147, CurrSamplesPerSec=29.789421491271877, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:23,644] [INFO] [timer.py:197:stop] 0/2261, RunningAvgSamplesPerSec=29.966366900331046, CurrSamplesPerSec=29.650204287666195, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:29,513] [INFO] [timer.py:197:stop] 0/2262, RunningAvgSamplesPerSec=29.966166305593173, CurrSamplesPerSec=29.519776022440865, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:35,628] [INFO] [timer.py:197:stop] 0/2263, RunningAvgSamplesPerSec=29.96626978416363, CurrSamplesPerSec=30.201971622222338, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:41,677] [INFO] [timer.py:197:stop] 0/2264, RunningAvgSamplesPerSec=29.96630622866724, CurrSamplesPerSec=30.04893456231123, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:47,606] [INFO] [timer.py:197:stop] 0/2265, RunningAvgSamplesPerSec=29.965940445712697, CurrSamplesPerSec=29.160780801704746, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:53,616] [INFO] [timer.py:197:stop] 0/2266, RunningAvgSamplesPerSec=29.965815717748097, CurrSamplesPerSec=29.686191385395638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:30:59,538] [INFO] [timer.py:197:stop] 0/2267, RunningAvgSamplesPerSec=29.965686688086446, CurrSamplesPerSec=29.676385062368006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:05,646] [INFO] [timer.py:197:stop] 0/2268, RunningAvgSamplesPerSec=29.96587804710128, CurrSamplesPerSec=30.40567020299221, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:11,619] [INFO] [timer.py:197:stop] 0/2269, RunningAvgSamplesPerSec=29.965428322877127, CurrSamplesPerSec=28.97988474352303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:19,691] [INFO] [logging.py:68:log_dist] [Rank 0] step=2270, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:31:19,692] [INFO] [timer.py:197:stop] 0/2270, RunningAvgSamplesPerSec=29.96531460355929, CurrSamplesPerSec=29.709712907497238, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:26,049] [INFO] [timer.py:197:stop] 0/2271, RunningAvgSamplesPerSec=29.96495734278918, CurrSamplesPerSec=29.17603225112594, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:31,903] [INFO] [timer.py:197:stop] 0/2272, RunningAvgSamplesPerSec=29.964989527322984, CurrSamplesPerSec=30.038194719145846, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:34,387] [INFO] [timer.py:197:stop] 0/2273, RunningAvgSamplesPerSec=29.96495226172005, CurrSamplesPerSec=29.88059758644749, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:36,520] [INFO] [timer.py:197:stop] 0/2274, RunningAvgSamplesPerSec=29.965125452990304, CurrSamplesPerSec=30.363676446950837, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:38,688] [INFO] [timer.py:197:stop] 0/2275, RunningAvgSamplesPerSec=29.965088685402762, CurrSamplesPerSec=29.881785060568856, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 56.01} +[2022-12-14 20:31:40,858] [INFO] [timer.py:197:stop] 0/2276, RunningAvgSamplesPerSec=29.96504144026457, CurrSamplesPerSec=29.85803689093868, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:42,982] [INFO] [timer.py:197:stop] 0/2277, RunningAvgSamplesPerSec=29.965273930925285, CurrSamplesPerSec=30.50345715108751, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:45,120] [INFO] [timer.py:197:stop] 0/2278, RunningAvgSamplesPerSec=29.96543484321554, CurrSamplesPerSec=30.33603982311136, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:47,269] [INFO] [timer.py:197:stop] 0/2279, RunningAvgSamplesPerSec=29.965510581872984, CurrSamplesPerSec=30.138889593733378, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:31:49,146] [INFO] [logging.py:68:log_dist] [Rank 0] step=2280, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:31:49,146] [INFO] [timer.py:197:stop] 0/2280, RunningAvgSamplesPerSec=29.967229042869338, CurrSamplesPerSec=34.4681215839785, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:32:32,770] [INFO] [timer.py:197:stop] 0/2281, RunningAvgSamplesPerSec=29.967000325326783, CurrSamplesPerSec=29.454889441893695, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:32:39,260] [INFO] [timer.py:197:stop] 0/2282, RunningAvgSamplesPerSec=29.96693282184375, CurrSamplesPerSec=29.81387845985232, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:32:46,060] [INFO] [timer.py:197:stop] 0/2283, RunningAvgSamplesPerSec=29.96680765915084, CurrSamplesPerSec=29.684129813095407, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:32:52,684] [INFO] [timer.py:197:stop] 0/2284, RunningAvgSamplesPerSec=29.966813302408404, CurrSamplesPerSec=29.979691107013583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:32:59,032] [INFO] [timer.py:197:stop] 0/2285, RunningAvgSamplesPerSec=29.96712151687989, CurrSamplesPerSec=30.687379207268155, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:33:08,078] [INFO] [timer.py:197:stop] 0/2286, RunningAvgSamplesPerSec=29.967115755678506, CurrSamplesPerSec=29.95396870580797, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:33:14,565] [INFO] [timer.py:197:stop] 0/2287, RunningAvgSamplesPerSec=29.96716152630363, CurrSamplesPerSec=30.072067758520927, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:33:20,717] [INFO] [timer.py:197:stop] 0/2288, RunningAvgSamplesPerSec=29.967201935938604, CurrSamplesPerSec=30.059823465536162, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:33:27,755] [INFO] [timer.py:197:stop] 0/2289, RunningAvgSamplesPerSec=29.96702290089449, CurrSamplesPerSec=29.563265512001387, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:33:34,026] [INFO] [logging.py:68:log_dist] [Rank 0] step=2290, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:33:34,027] [INFO] [timer.py:197:stop] 0/2290, RunningAvgSamplesPerSec=29.966972011157726, CurrSamplesPerSec=29.851037642479845, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:33:40,885] [INFO] [timer.py:197:stop] 0/2291, RunningAvgSamplesPerSec=29.96704987646029, CurrSamplesPerSec=30.146271637958307, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:33:47,093] [INFO] [timer.py:197:stop] 0/2292, RunningAvgSamplesPerSec=29.967127805641642, CurrSamplesPerSec=30.146576339208856, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:33:53,539] [INFO] [timer.py:197:stop] 0/2293, RunningAvgSamplesPerSec=29.96693636135591, CurrSamplesPerSec=29.53485295973525, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:34:00,916] [INFO] [timer.py:197:stop] 0/2294, RunningAvgSamplesPerSec=29.9667632115072, CurrSamplesPerSec=29.575261688315052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:34:07,377] [INFO] [timer.py:197:stop] 0/2295, RunningAvgSamplesPerSec=29.966657129539602, CurrSamplesPerSec=29.725474988331193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:34:15,048] [INFO] [timer.py:197:stop] 0/2296, RunningAvgSamplesPerSec=29.966473737086655, CurrSamplesPerSec=29.551776813594312, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:34:21,940] [INFO] [timer.py:197:stop] 0/2297, RunningAvgSamplesPerSec=29.966461796611394, CurrSamplesPerSec=29.939095372087444, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:34:29,850] [INFO] [timer.py:197:stop] 0/2298, RunningAvgSamplesPerSec=29.96662627065207, CurrSamplesPerSec=30.348911664901824, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:34:36,913] [INFO] [timer.py:197:stop] 0/2299, RunningAvgSamplesPerSec=29.96650386488581, CurrSamplesPerSec=29.688072655831288, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:34:43,480] [INFO] [logging.py:68:log_dist] [Rank 0] step=2300, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:34:43,481] [INFO] [timer.py:197:stop] 0/2300, RunningAvgSamplesPerSec=29.966603298177198, CurrSamplesPerSec=30.196756504775383, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 57.0} +[2022-12-14 20:34:50,111] [INFO] [timer.py:197:stop] 0/2301, RunningAvgSamplesPerSec=29.966700769975585, CurrSamplesPerSec=30.19237855611987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:34:56,644] [INFO] [timer.py:197:stop] 0/2302, RunningAvgSamplesPerSec=29.966337103589215, CurrSamplesPerSec=29.152971087091686, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:35:03,732] [INFO] [timer.py:197:stop] 0/2303, RunningAvgSamplesPerSec=29.966140928008638, CurrSamplesPerSec=29.521633012527772, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:35:10,533] [INFO] [timer.py:197:stop] 0/2304, RunningAvgSamplesPerSec=29.965783577129038, CurrSamplesPerSec=29.165488919718904, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:35:17,427] [INFO] [timer.py:197:stop] 0/2305, RunningAvgSamplesPerSec=29.96588630822642, CurrSamplesPerSec=30.20425528946291, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:35:24,488] [INFO] [timer.py:197:stop] 0/2306, RunningAvgSamplesPerSec=29.965903110774583, CurrSamplesPerSec=30.00464943572107, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:35:31,121] [INFO] [timer.py:197:stop] 0/2307, RunningAvgSamplesPerSec=29.965848417254062, CurrSamplesPerSec=29.840362474719914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:35:38,061] [INFO] [timer.py:197:stop] 0/2308, RunningAvgSamplesPerSec=29.965786178847814, CurrSamplesPerSec=29.823010479670426, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:35:44,418] [INFO] [timer.py:197:stop] 0/2309, RunningAvgSamplesPerSec=29.9660214857005, CurrSamplesPerSec=30.518650298824983, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:35:51,077] [INFO] [logging.py:68:log_dist] [Rank 0] step=2310, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:35:51,078] [INFO] [timer.py:197:stop] 0/2310, RunningAvgSamplesPerSec=29.965950064027282, CurrSamplesPerSec=29.802081695767598, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:35:57,668] [INFO] [timer.py:197:stop] 0/2311, RunningAvgSamplesPerSec=29.965260782206492, CurrSamplesPerSec=28.45463267472669, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:04,633] [INFO] [timer.py:197:stop] 0/2312, RunningAvgSamplesPerSec=29.965004386343086, CurrSamplesPerSec=29.384461088574568, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:07,147] [INFO] [timer.py:197:stop] 0/2313, RunningAvgSamplesPerSec=29.965318530034118, CurrSamplesPerSec=30.70900823119227, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:09,294] [INFO] [timer.py:197:stop] 0/2314, RunningAvgSamplesPerSec=29.96541166936468, CurrSamplesPerSec=30.18221465692673, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:11,449] [INFO] [timer.py:197:stop] 0/2315, RunningAvgSamplesPerSec=29.965618549272218, CurrSamplesPerSec=30.45168679381327, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:13,600] [INFO] [timer.py:197:stop] 0/2316, RunningAvgSamplesPerSec=29.965680472377354, CurrSamplesPerSec=30.109596794446926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:15,737] [INFO] [timer.py:197:stop] 0/2317, RunningAvgSamplesPerSec=29.96583410253144, CurrSamplesPerSec=30.32560426291854, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:17,878] [INFO] [timer.py:197:stop] 0/2318, RunningAvgSamplesPerSec=29.96596550850786, CurrSamplesPerSec=30.27329156682161, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:19,998] [INFO] [timer.py:197:stop] 0/2319, RunningAvgSamplesPerSec=29.966226150500862, CurrSamplesPerSec=30.582288472057073, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:21,879] [INFO] [logging.py:68:log_dist] [Rank 0] step=2320, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:36:21,879] [INFO] [timer.py:197:stop] 0/2320, RunningAvgSamplesPerSec=29.96792629134809, CurrSamplesPerSec=34.50361790259594, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:36:58,316] [INFO] [timer.py:197:stop] 0/2321, RunningAvgSamplesPerSec=29.967853744643488, CurrSamplesPerSec=29.800629260863275, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:37:04,696] [INFO] [timer.py:197:stop] 0/2322, RunningAvgSamplesPerSec=29.967674534844335, CurrSamplesPerSec=29.55777390650957, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:37:10,754] [INFO] [timer.py:197:stop] 0/2323, RunningAvgSamplesPerSec=29.967732415792195, CurrSamplesPerSec=30.102620903419123, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:37:16,619] [INFO] [timer.py:197:stop] 0/2324, RunningAvgSamplesPerSec=29.96774372624014, CurrSamplesPerSec=29.994018302203006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:37:22,709] [INFO] [timer.py:197:stop] 0/2325, RunningAvgSamplesPerSec=29.967493803529607, CurrSamplesPerSec=29.39820234681442, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 58.0} +[2022-12-14 20:37:29,096] [INFO] [timer.py:197:stop] 0/2326, RunningAvgSamplesPerSec=29.96765285005464, CurrSamplesPerSec=30.341731856791927, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:37:35,155] [INFO] [timer.py:197:stop] 0/2327, RunningAvgSamplesPerSec=29.967667036368148, CurrSamplesPerSec=30.000672355461457, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:37:41,254] [INFO] [timer.py:197:stop] 0/2328, RunningAvgSamplesPerSec=29.967676177802627, CurrSamplesPerSec=29.988945103906797, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:37:47,417] [INFO] [timer.py:197:stop] 0/2329, RunningAvgSamplesPerSec=29.96768940884898, CurrSamplesPerSec=29.998496473686558, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:37:53,453] [INFO] [logging.py:68:log_dist] [Rank 0] step=2330, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:37:53,454] [INFO] [timer.py:197:stop] 0/2330, RunningAvgSamplesPerSec=29.967792685509103, CurrSamplesPerSec=30.210061165042752, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:37:59,765] [INFO] [timer.py:197:stop] 0/2331, RunningAvgSamplesPerSec=29.967831348558786, CurrSamplesPerSec=30.058110194780838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:38:06,243] [INFO] [timer.py:197:stop] 0/2332, RunningAvgSamplesPerSec=29.967987430500013, CurrSamplesPerSec=30.33596782913568, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:38:12,681] [INFO] [timer.py:197:stop] 0/2333, RunningAvgSamplesPerSec=29.967424356526173, CurrSamplesPerSec=28.710512724061356, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:38:18,785] [INFO] [timer.py:197:stop] 0/2334, RunningAvgSamplesPerSec=29.967610613286475, CurrSamplesPerSec=30.40816045413188, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:38:24,835] [INFO] [timer.py:197:stop] 0/2335, RunningAvgSamplesPerSec=29.967229399561536, CurrSamplesPerSec=29.103862127497543, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:38:31,336] [INFO] [timer.py:197:stop] 0/2336, RunningAvgSamplesPerSec=29.967220129274615, CurrSamplesPerSec=29.945608154124994, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:38:37,298] [INFO] [timer.py:197:stop] 0/2337, RunningAvgSamplesPerSec=29.967198324776383, CurrSamplesPerSec=29.916392942937335, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:38:43,247] [INFO] [timer.py:197:stop] 0/2338, RunningAvgSamplesPerSec=29.96723061300356, CurrSamplesPerSec=30.042813860890373, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:38:49,513] [INFO] [timer.py:197:stop] 0/2339, RunningAvgSamplesPerSec=29.96724859575536, CurrSamplesPerSec=30.009315297776844, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:38:55,340] [INFO] [logging.py:68:log_dist] [Rank 0] step=2340, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:38:55,341] [INFO] [timer.py:197:stop] 0/2340, RunningAvgSamplesPerSec=29.967407819590562, CurrSamplesPerSec=30.344194494612637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:01,447] [INFO] [timer.py:197:stop] 0/2341, RunningAvgSamplesPerSec=29.967370455960346, CurrSamplesPerSec=29.880268303616003, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:07,623] [INFO] [timer.py:197:stop] 0/2342, RunningAvgSamplesPerSec=29.967419469021372, CurrSamplesPerSec=30.082501457699156, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:13,661] [INFO] [timer.py:197:stop] 0/2343, RunningAvgSamplesPerSec=29.96738164641998, CurrSamplesPerSec=29.879137488544966, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:20,338] [INFO] [timer.py:197:stop] 0/2344, RunningAvgSamplesPerSec=29.967276508505325, CurrSamplesPerSec=29.72315453445227, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:26,358] [INFO] [timer.py:197:stop] 0/2345, RunningAvgSamplesPerSec=29.967057377146293, CurrSamplesPerSec=29.462496364320845, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:32,250] [INFO] [timer.py:197:stop] 0/2346, RunningAvgSamplesPerSec=29.966977883888354, CurrSamplesPerSec=29.78187612797548, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:38,531] [INFO] [timer.py:197:stop] 0/2347, RunningAvgSamplesPerSec=29.96674706644091, CurrSamplesPerSec=29.43530992087838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:44,986] [INFO] [timer.py:197:stop] 0/2348, RunningAvgSamplesPerSec=29.966374674802207, CurrSamplesPerSec=29.117853807329052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:51,072] [INFO] [timer.py:197:stop] 0/2349, RunningAvgSamplesPerSec=29.96643746490019, CurrSamplesPerSec=30.114471031327028, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:39:57,369] [INFO] [logging.py:68:log_dist] [Rank 0] step=2350, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:39:57,369] [INFO] [timer.py:197:stop] 0/2350, RunningAvgSamplesPerSec=29.966314824960648, CurrSamplesPerSec=29.681218494131436, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 58.01} +[2022-12-14 20:40:03,422] [INFO] [timer.py:197:stop] 0/2351, RunningAvgSamplesPerSec=29.965959652834734, CurrSamplesPerSec=29.154604890527732, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:40:10,179] [INFO] [timer.py:197:stop] 0/2352, RunningAvgSamplesPerSec=29.965898879278782, CurrSamplesPerSec=29.823818951111285, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:40:12,540] [INFO] [timer.py:197:stop] 0/2353, RunningAvgSamplesPerSec=29.966171446438, CurrSamplesPerSec=30.62070092066704, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:40:14,655] [INFO] [timer.py:197:stop] 0/2354, RunningAvgSamplesPerSec=29.966446688280328, CurrSamplesPerSec=30.627828164524104, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:40:16,770] [INFO] [timer.py:197:stop] 0/2355, RunningAvgSamplesPerSec=29.96671569868634, CurrSamplesPerSec=30.613081235291354, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:40:18,910] [INFO] [timer.py:197:stop] 0/2356, RunningAvgSamplesPerSec=29.966843954235383, CurrSamplesPerSec=30.271700668212528, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:40:21,090] [INFO] [timer.py:197:stop] 0/2357, RunningAvgSamplesPerSec=29.966896536268486, CurrSamplesPerSec=30.09118824652638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:40:23,264] [INFO] [timer.py:197:stop] 0/2358, RunningAvgSamplesPerSec=29.96681672895602, CurrSamplesPerSec=29.780042418751936, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:40:25,380] [INFO] [timer.py:197:stop] 0/2359, RunningAvgSamplesPerSec=29.967087884579243, CurrSamplesPerSec=30.619852161237628, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:40:27,302] [INFO] [logging.py:68:log_dist] [Rank 0] step=2360, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:40:27,302] [INFO] [timer.py:197:stop] 0/2360, RunningAvgSamplesPerSec=29.968480765519605, CurrSamplesPerSec=33.65559229654534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:05,239] [INFO] [timer.py:197:stop] 0/2361, RunningAvgSamplesPerSec=29.96838954975663, CurrSamplesPerSec=29.754836129482506, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:11,431] [INFO] [timer.py:197:stop] 0/2362, RunningAvgSamplesPerSec=29.96836384553797, CurrSamplesPerSec=29.907850085645038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:18,019] [INFO] [timer.py:197:stop] 0/2363, RunningAvgSamplesPerSec=29.967989498604307, CurrSamplesPerSec=29.10983977822186, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:23,820] [INFO] [timer.py:197:stop] 0/2364, RunningAvgSamplesPerSec=29.968010402642143, CurrSamplesPerSec=30.017446286603253, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:29,674] [INFO] [timer.py:197:stop] 0/2365, RunningAvgSamplesPerSec=29.968191982970552, CurrSamplesPerSec=30.403314656188346, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:35,998] [INFO] [timer.py:197:stop] 0/2366, RunningAvgSamplesPerSec=29.9678856346186, CurrSamplesPerSec=29.26106557689771, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:41,894] [INFO] [timer.py:197:stop] 0/2367, RunningAvgSamplesPerSec=29.96805250646758, CurrSamplesPerSec=30.367801892329364, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:47,757] [INFO] [timer.py:197:stop] 0/2368, RunningAvgSamplesPerSec=29.96782853995353, CurrSamplesPerSec=29.447351053821418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:53,904] [INFO] [timer.py:197:stop] 0/2369, RunningAvgSamplesPerSec=29.967524976824784, CurrSamplesPerSec=29.266112442909183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:41:59,721] [INFO] [logging.py:68:log_dist] [Rank 0] step=2370, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:41:59,722] [INFO] [timer.py:197:stop] 0/2370, RunningAvgSamplesPerSec=29.967401744700705, CurrSamplesPerSec=29.67852430979181, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:42:06,060] [INFO] [timer.py:197:stop] 0/2371, RunningAvgSamplesPerSec=29.967278884527882, CurrSamplesPerSec=29.679144486721444, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:42:12,259] [INFO] [timer.py:197:stop] 0/2372, RunningAvgSamplesPerSec=29.966900093358102, CurrSamplesPerSec=29.095644499920713, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:42:18,132] [INFO] [timer.py:197:stop] 0/2373, RunningAvgSamplesPerSec=29.96661723667373, CurrSamplesPerSec=29.310921374775802, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:42:24,379] [INFO] [timer.py:197:stop] 0/2374, RunningAvgSamplesPerSec=29.966155615022448, CurrSamplesPerSec=28.91023416350606, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:42:30,359] [INFO] [timer.py:197:stop] 0/2375, RunningAvgSamplesPerSec=29.966198506978664, CurrSamplesPerSec=30.068284972054975, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 59.0} +[2022-12-14 20:42:36,374] [INFO] [timer.py:197:stop] 0/2376, RunningAvgSamplesPerSec=29.966270462418397, CurrSamplesPerSec=30.137999660262516, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:42:42,426] [INFO] [timer.py:197:stop] 0/2377, RunningAvgSamplesPerSec=29.966130758082972, CurrSamplesPerSec=29.63810471026838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:42:48,448] [INFO] [timer.py:197:stop] 0/2378, RunningAvgSamplesPerSec=29.966124540534846, CurrSamplesPerSec=29.95136513992157, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:42:54,570] [INFO] [timer.py:197:stop] 0/2379, RunningAvgSamplesPerSec=29.966190060744935, CurrSamplesPerSec=30.122679392827642, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:00,517] [INFO] [logging.py:68:log_dist] [Rank 0] step=2380, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:43:00,518] [INFO] [timer.py:197:stop] 0/2380, RunningAvgSamplesPerSec=29.966109283770816, CurrSamplesPerSec=29.775325372258873, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:07,412] [INFO] [timer.py:197:stop] 0/2381, RunningAvgSamplesPerSec=29.965828269695145, CurrSamplesPerSec=29.312160024128016, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:13,291] [INFO] [timer.py:197:stop] 0/2382, RunningAvgSamplesPerSec=29.965858842888025, CurrSamplesPerSec=30.038769513109454, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:19,857] [INFO] [timer.py:197:stop] 0/2383, RunningAvgSamplesPerSec=29.966002697190426, CurrSamplesPerSec=30.31233457596737, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:25,955] [INFO] [timer.py:197:stop] 0/2384, RunningAvgSamplesPerSec=29.9659000466088, CurrSamplesPerSec=29.723467198606592, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:32,324] [INFO] [timer.py:197:stop] 0/2385, RunningAvgSamplesPerSec=29.965832030453154, CurrSamplesPerSec=29.804689155057623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:38,237] [INFO] [timer.py:197:stop] 0/2386, RunningAvgSamplesPerSec=29.96597473951572, CurrSamplesPerSec=30.30995582150923, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:44,105] [INFO] [timer.py:197:stop] 0/2387, RunningAvgSamplesPerSec=29.965722360143452, CurrSamplesPerSec=29.375897808140536, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:50,239] [INFO] [timer.py:197:stop] 0/2388, RunningAvgSamplesPerSec=29.965396999643982, CurrSamplesPerSec=29.209007895401168, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:43:55,889] [INFO] [timer.py:197:stop] 0/2389, RunningAvgSamplesPerSec=29.9652851372778, CurrSamplesPerSec=29.700738857663957, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:02,035] [INFO] [logging.py:68:log_dist] [Rank 0] step=2390, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:44:02,035] [INFO] [timer.py:197:stop] 0/2390, RunningAvgSamplesPerSec=29.965523121579515, CurrSamplesPerSec=30.544573541310914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:07,952] [INFO] [timer.py:197:stop] 0/2391, RunningAvgSamplesPerSec=29.965629443454038, CurrSamplesPerSec=30.22169662730334, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:14,192] [INFO] [timer.py:197:stop] 0/2392, RunningAvgSamplesPerSec=29.96567795942969, CurrSamplesPerSec=30.082032864426395, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:16,634] [INFO] [timer.py:197:stop] 0/2393, RunningAvgSamplesPerSec=29.965644075114145, CurrSamplesPerSec=29.884878923778796, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:18,769] [INFO] [timer.py:197:stop] 0/2394, RunningAvgSamplesPerSec=29.96580047457715, CurrSamplesPerSec=30.344479198577478, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:20,888] [INFO] [timer.py:197:stop] 0/2395, RunningAvgSamplesPerSec=29.966053719856536, CurrSamplesPerSec=30.58431988158003, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:23,037] [INFO] [timer.py:197:stop] 0/2396, RunningAvgSamplesPerSec=29.966126798166087, CurrSamplesPerSec=30.142030159158992, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:25,164] [INFO] [timer.py:197:stop] 0/2397, RunningAvgSamplesPerSec=29.966331635840916, CurrSamplesPerSec=30.464874802952114, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:27,305] [INFO] [timer.py:197:stop] 0/2398, RunningAvgSamplesPerSec=29.966451994872514, CurrSamplesPerSec=30.257512881415234, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:29,478] [INFO] [timer.py:197:stop] 0/2399, RunningAvgSamplesPerSec=29.96638994332518, CurrSamplesPerSec=29.81844873903728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:44:31,350] [INFO] [logging.py:68:log_dist] [Rank 0] step=2400, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:44:31,351] [INFO] [timer.py:197:stop] 0/2400, RunningAvgSamplesPerSec=29.96805769272142, CurrSamplesPerSec=34.581293244256614, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 59.01} +[2022-12-14 20:45:08,169] [INFO] [timer.py:197:stop] 0/2401, RunningAvgSamplesPerSec=29.967532282344138, CurrSamplesPerSec=28.758453081006596, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:45:14,221] [INFO] [timer.py:197:stop] 0/2402, RunningAvgSamplesPerSec=29.96745891539061, CurrSamplesPerSec=29.79247972431168, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:45:20,265] [INFO] [timer.py:197:stop] 0/2403, RunningAvgSamplesPerSec=29.96702948473792, CurrSamplesPerSec=28.970676999759544, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:45:26,198] [INFO] [timer.py:197:stop] 0/2404, RunningAvgSamplesPerSec=29.967090450163436, CurrSamplesPerSec=30.11418724870764, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:45:32,090] [INFO] [timer.py:197:stop] 0/2405, RunningAvgSamplesPerSec=29.967151636571874, CurrSamplesPerSec=30.114846037991974, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:45:37,893] [INFO] [timer.py:197:stop] 0/2406, RunningAvgSamplesPerSec=29.967068778684514, CurrSamplesPerSec=29.769275999040943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:45:43,767] [INFO] [timer.py:197:stop] 0/2407, RunningAvgSamplesPerSec=29.9669406404242, CurrSamplesPerSec=29.662031868107583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:45:49,542] [INFO] [timer.py:197:stop] 0/2408, RunningAvgSamplesPerSec=29.96705849899756, CurrSamplesPerSec=30.25321617733707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:45:55,665] [INFO] [timer.py:197:stop] 0/2409, RunningAvgSamplesPerSec=29.967325093267647, CurrSamplesPerSec=30.622786344269418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:01,770] [INFO] [logging.py:68:log_dist] [Rank 0] step=2410, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:46:01,771] [INFO] [timer.py:197:stop] 0/2410, RunningAvgSamplesPerSec=29.96735488823334, CurrSamplesPerSec=30.03924348257456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:10,999] [INFO] [timer.py:197:stop] 0/2411, RunningAvgSamplesPerSec=29.967065068371156, CurrSamplesPerSec=29.28506808260833, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:17,009] [INFO] [timer.py:197:stop] 0/2412, RunningAvgSamplesPerSec=29.966927394610014, CurrSamplesPerSec=29.638903187439535, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:23,072] [INFO] [timer.py:197:stop] 0/2413, RunningAvgSamplesPerSec=29.966573411853144, CurrSamplesPerSec=29.13709853453457, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:29,228] [INFO] [timer.py:197:stop] 0/2414, RunningAvgSamplesPerSec=29.966411548100865, CurrSamplesPerSec=29.581177043101707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:35,300] [INFO] [timer.py:197:stop] 0/2415, RunningAvgSamplesPerSec=29.966597296197246, CurrSamplesPerSec=30.421424552309382, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:41,612] [INFO] [timer.py:197:stop] 0/2416, RunningAvgSamplesPerSec=29.96671863291689, CurrSamplesPerSec=30.262394190027017, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:47,730] [INFO] [timer.py:197:stop] 0/2417, RunningAvgSamplesPerSec=29.966697598394887, CurrSamplesPerSec=29.916006192374855, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:53,577] [INFO] [timer.py:197:stop] 0/2418, RunningAvgSamplesPerSec=29.966396938405257, CurrSamplesPerSec=29.25748724895182, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:46:59,910] [INFO] [timer.py:197:stop] 0/2419, RunningAvgSamplesPerSec=29.96626223234216, CurrSamplesPerSec=29.644310402519654, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:47:06,019] [INFO] [logging.py:68:log_dist] [Rank 0] step=2420, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:47:06,020] [INFO] [timer.py:197:stop] 0/2420, RunningAvgSamplesPerSec=29.96647393556097, CurrSamplesPerSec=30.48705334529637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:47:12,105] [INFO] [timer.py:197:stop] 0/2421, RunningAvgSamplesPerSec=29.966372673680763, CurrSamplesPerSec=29.723506693494013, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:47:17,944] [INFO] [timer.py:197:stop] 0/2422, RunningAvgSamplesPerSec=29.966520880742916, CurrSamplesPerSec=30.32937668595898, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:47:24,617] [INFO] [timer.py:197:stop] 0/2423, RunningAvgSamplesPerSec=29.96643196258251, CurrSamplesPerSec=29.752784798734144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:47:30,864] [INFO] [timer.py:197:stop] 0/2424, RunningAvgSamplesPerSec=29.966333811918073, CurrSamplesPerSec=29.730581262417026, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:47:36,851] [INFO] [timer.py:197:stop] 0/2425, RunningAvgSamplesPerSec=29.96650428229378, CurrSamplesPerSec=30.38515408128291, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 60.01} +[2022-12-14 20:47:42,851] [INFO] [timer.py:197:stop] 0/2426, RunningAvgSamplesPerSec=29.966422999679875, CurrSamplesPerSec=29.770761698119987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:47:49,219] [INFO] [timer.py:197:stop] 0/2427, RunningAvgSamplesPerSec=29.96647800342891, CurrSamplesPerSec=30.10040320690666, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:47:55,182] [INFO] [timer.py:197:stop] 0/2428, RunningAvgSamplesPerSec=29.966673137852574, CurrSamplesPerSec=30.447469456454016, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:02,505] [INFO] [timer.py:197:stop] 0/2429, RunningAvgSamplesPerSec=29.96673589776012, CurrSamplesPerSec=30.11976928880505, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:08,715] [INFO] [logging.py:68:log_dist] [Rank 0] step=2430, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:48:08,715] [INFO] [timer.py:197:stop] 0/2430, RunningAvgSamplesPerSec=29.966641917354142, CurrSamplesPerSec=29.740275167568857, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:14,745] [INFO] [timer.py:197:stop] 0/2431, RunningAvgSamplesPerSec=29.96661778686082, CurrSamplesPerSec=29.90814332174973, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:20,967] [INFO] [timer.py:197:stop] 0/2432, RunningAvgSamplesPerSec=29.96708656937068, CurrSamplesPerSec=31.15075414555507, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:23,585] [INFO] [timer.py:197:stop] 0/2433, RunningAvgSamplesPerSec=29.967250602804942, CurrSamplesPerSec=30.371227447618534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:25,792] [INFO] [timer.py:197:stop] 0/2434, RunningAvgSamplesPerSec=29.967149318275435, CurrSamplesPerSec=29.722934027898884, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:27,999] [INFO] [timer.py:197:stop] 0/2435, RunningAvgSamplesPerSec=29.96706560782957, CurrSamplesPerSec=29.764856095839047, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:30,150] [INFO] [timer.py:197:stop] 0/2436, RunningAvgSamplesPerSec=29.96713223346136, CurrSamplesPerSec=30.130114371558054, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:32,276] [INFO] [timer.py:197:stop] 0/2437, RunningAvgSamplesPerSec=29.967335217919356, CurrSamplesPerSec=30.469684908567263, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:34,454] [INFO] [timer.py:197:stop] 0/2438, RunningAvgSamplesPerSec=29.967244622591128, CurrSamplesPerSec=29.74825770158127, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:36,589] [INFO] [timer.py:197:stop] 0/2439, RunningAvgSamplesPerSec=29.96739289071156, CurrSamplesPerSec=30.332982094265486, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:48:38,539] [INFO] [logging.py:68:log_dist] [Rank 0] step=2440, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:48:38,540] [INFO] [timer.py:197:stop] 0/2440, RunningAvgSamplesPerSec=29.968731297109812, CurrSamplesPerSec=33.62895797449122, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:49:25,667] [INFO] [timer.py:197:stop] 0/2441, RunningAvgSamplesPerSec=29.968581921129424, CurrSamplesPerSec=29.608777423796422, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:49:32,700] [INFO] [timer.py:197:stop] 0/2442, RunningAvgSamplesPerSec=29.968419882713178, CurrSamplesPerSec=29.57835432524091, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:49:39,553] [INFO] [timer.py:197:stop] 0/2443, RunningAvgSamplesPerSec=29.96835035303425, CurrSamplesPerSec=29.79965333060391, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:49:46,612] [INFO] [timer.py:197:stop] 0/2444, RunningAvgSamplesPerSec=29.96827619366369, CurrSamplesPerSec=29.78834051477086, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:49:53,130] [INFO] [timer.py:197:stop] 0/2445, RunningAvgSamplesPerSec=29.968378563075355, CurrSamplesPerSec=30.22046837565385, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:50:00,197] [INFO] [timer.py:197:stop] 0/2446, RunningAvgSamplesPerSec=29.96830048279454, CurrSamplesPerSec=29.77875730489919, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:50:07,017] [INFO] [timer.py:197:stop] 0/2447, RunningAvgSamplesPerSec=29.96827251360072, CurrSamplesPerSec=29.900071432127838, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:50:13,286] [INFO] [timer.py:197:stop] 0/2448, RunningAvgSamplesPerSec=29.968301534053484, CurrSamplesPerSec=30.0394250067563, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:50:20,389] [INFO] [timer.py:197:stop] 0/2449, RunningAvgSamplesPerSec=29.967998369535994, CurrSamplesPerSec=29.24437101150043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:50:26,848] [INFO] [logging.py:68:log_dist] [Rank 0] step=2450, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:50:26,849] [INFO] [timer.py:197:stop] 0/2450, RunningAvgSamplesPerSec=29.967811174389595, CurrSamplesPerSec=29.516643692761328, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 61.0} +[2022-12-14 20:50:33,825] [INFO] [timer.py:197:stop] 0/2451, RunningAvgSamplesPerSec=29.96782343311191, CurrSamplesPerSec=29.997862878703728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:50:40,504] [INFO] [timer.py:197:stop] 0/2452, RunningAvgSamplesPerSec=29.96760400045323, CurrSamplesPerSec=29.439684146455146, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:50:46,936] [INFO] [timer.py:197:stop] 0/2453, RunningAvgSamplesPerSec=29.968003873670867, CurrSamplesPerSec=30.980816820684698, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:50:53,831] [INFO] [timer.py:197:stop] 0/2454, RunningAvgSamplesPerSec=29.968269481077602, CurrSamplesPerSec=30.633735110250903, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:51:00,534] [INFO] [timer.py:197:stop] 0/2455, RunningAvgSamplesPerSec=29.968236377025963, CurrSamplesPerSec=29.88728459558515, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:51:07,655] [INFO] [timer.py:197:stop] 0/2456, RunningAvgSamplesPerSec=29.96831968274806, CurrSamplesPerSec=30.174072182719218, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:51:14,109] [INFO] [timer.py:197:stop] 0/2457, RunningAvgSamplesPerSec=29.968656607868695, CurrSamplesPerSec=30.818939112047293, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:51:20,554] [INFO] [timer.py:197:stop] 0/2458, RunningAvgSamplesPerSec=29.968054744100346, CurrSamplesPerSec=28.55993485279639, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:51:27,531] [INFO] [timer.py:197:stop] 0/2459, RunningAvgSamplesPerSec=29.96794671190239, CurrSamplesPerSec=29.704949082770582, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:51:34,118] [INFO] [logging.py:68:log_dist] [Rank 0] step=2460, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:51:34,119] [INFO] [timer.py:197:stop] 0/2460, RunningAvgSamplesPerSec=29.967940638678115, CurrSamplesPerSec=29.953026156004526, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:51:41,168] [INFO] [timer.py:197:stop] 0/2461, RunningAvgSamplesPerSec=29.96782925450005, CurrSamplesPerSec=29.69652654058594, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:51:47,707] [INFO] [timer.py:197:stop] 0/2462, RunningAvgSamplesPerSec=29.967991193596397, CurrSamplesPerSec=30.371564203896618, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:51:55,016] [INFO] [timer.py:197:stop] 0/2463, RunningAvgSamplesPerSec=29.968244409554277, CurrSamplesPerSec=30.604383647104644, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:01,915] [INFO] [timer.py:197:stop] 0/2464, RunningAvgSamplesPerSec=29.96822533850769, CurrSamplesPerSec=29.921364911734717, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:08,705] [INFO] [timer.py:197:stop] 0/2465, RunningAvgSamplesPerSec=29.967880725266244, CurrSamplesPerSec=29.142811421300646, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:15,054] [INFO] [timer.py:197:stop] 0/2466, RunningAvgSamplesPerSec=29.96819335444964, CurrSamplesPerSec=30.758513849586443, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:22,040] [INFO] [timer.py:197:stop] 0/2467, RunningAvgSamplesPerSec=29.968257945836182, CurrSamplesPerSec=30.128261200418958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:29,032] [INFO] [timer.py:197:stop] 0/2468, RunningAvgSamplesPerSec=29.968120086135393, CurrSamplesPerSec=29.632107689432505, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:36,178] [INFO] [timer.py:197:stop] 0/2469, RunningAvgSamplesPerSec=29.968193076315014, CurrSamplesPerSec=30.149274906874183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:43,202] [INFO] [logging.py:68:log_dist] [Rank 0] step=2470, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:52:43,203] [INFO] [timer.py:197:stop] 0/2470, RunningAvgSamplesPerSec=29.967974768976834, CurrSamplesPerSec=29.43892219522183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:49,704] [INFO] [timer.py:197:stop] 0/2471, RunningAvgSamplesPerSec=29.967884285608562, CurrSamplesPerSec=29.746223759129077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:56,478] [INFO] [timer.py:197:stop] 0/2472, RunningAvgSamplesPerSec=29.967842338083287, CurrSamplesPerSec=29.8646307404638, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:52:58,935] [INFO] [timer.py:197:stop] 0/2473, RunningAvgSamplesPerSec=29.967809303721133, CurrSamplesPerSec=29.886436077916695, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:53:01,064] [INFO] [timer.py:197:stop] 0/2474, RunningAvgSamplesPerSec=29.96798712221944, CurrSamplesPerSec=30.413917491958202, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:53:03,243] [INFO] [timer.py:197:stop] 0/2475, RunningAvgSamplesPerSec=29.96788682454071, CurrSamplesPerSec=29.721986216090382, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 61.01} +[2022-12-14 20:53:05,390] [INFO] [timer.py:197:stop] 0/2476, RunningAvgSamplesPerSec=29.96798286500829, CurrSamplesPerSec=30.2073890973156, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:53:07,538] [INFO] [timer.py:197:stop] 0/2477, RunningAvgSamplesPerSec=29.968060766145747, CurrSamplesPerSec=30.162036158425956, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:53:09,656] [INFO] [timer.py:197:stop] 0/2478, RunningAvgSamplesPerSec=29.968308536309195, CurrSamplesPerSec=30.594355448740195, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:53:11,774] [INFO] [timer.py:197:stop] 0/2479, RunningAvgSamplesPerSec=29.968549759417826, CurrSamplesPerSec=30.5779687040971, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:53:13,703] [INFO] [logging.py:68:log_dist] [Rank 0] step=2480, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:53:13,704] [INFO] [timer.py:197:stop] 0/2480, RunningAvgSamplesPerSec=29.969879498106376, CurrSamplesPerSec=33.670515310346836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:53:57,781] [INFO] [timer.py:197:stop] 0/2481, RunningAvgSamplesPerSec=29.969702807797265, CurrSamplesPerSec=29.538171172880233, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:54:04,603] [INFO] [timer.py:197:stop] 0/2482, RunningAvgSamplesPerSec=29.969567287822397, CurrSamplesPerSec=29.637338997983957, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:54:12,949] [INFO] [timer.py:197:stop] 0/2483, RunningAvgSamplesPerSec=29.96930026548459, CurrSamplesPerSec=29.32140679159679, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:54:20,908] [INFO] [timer.py:197:stop] 0/2484, RunningAvgSamplesPerSec=29.969306887819844, CurrSamplesPerSec=29.985745917550982, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:54:27,275] [INFO] [timer.py:197:stop] 0/2485, RunningAvgSamplesPerSec=29.969388199695906, CurrSamplesPerSec=30.17257308941738, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:54:33,779] [INFO] [timer.py:197:stop] 0/2486, RunningAvgSamplesPerSec=29.969407131896975, CurrSamplesPerSec=30.016489668434282, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:54:40,401] [INFO] [timer.py:197:stop] 0/2487, RunningAvgSamplesPerSec=29.969663030188542, CurrSamplesPerSec=30.61909425534366, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:54:46,993] [INFO] [timer.py:197:stop] 0/2488, RunningAvgSamplesPerSec=29.9690782616668, CurrSamplesPerSec=28.583156522370608, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:54:54,092] [INFO] [timer.py:197:stop] 0/2489, RunningAvgSamplesPerSec=29.968492055713998, CurrSamplesPerSec=28.57878969481054, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:00,606] [INFO] [logging.py:68:log_dist] [Rank 0] step=2490, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:55:00,607] [INFO] [timer.py:197:stop] 0/2490, RunningAvgSamplesPerSec=29.968676730540473, CurrSamplesPerSec=30.435114308215283, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:07,632] [INFO] [timer.py:197:stop] 0/2491, RunningAvgSamplesPerSec=29.968829687196347, CurrSamplesPerSec=30.354282447285907, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:14,373] [INFO] [timer.py:197:stop] 0/2492, RunningAvgSamplesPerSec=29.96875400397826, CurrSamplesPerSec=29.78155562557414, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:20,770] [INFO] [timer.py:197:stop] 0/2493, RunningAvgSamplesPerSec=29.96860900212443, CurrSamplesPerSec=29.611854208459114, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:27,573] [INFO] [timer.py:197:stop] 0/2494, RunningAvgSamplesPerSec=29.968875609190672, CurrSamplesPerSec=30.648050549359965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:33,840] [INFO] [timer.py:197:stop] 0/2495, RunningAvgSamplesPerSec=29.968707413003727, CurrSamplesPerSec=29.555346141086996, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:40,389] [INFO] [timer.py:197:stop] 0/2496, RunningAvgSamplesPerSec=29.96831811292346, CurrSamplesPerSec=29.028249385475053, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:46,790] [INFO] [timer.py:197:stop] 0/2497, RunningAvgSamplesPerSec=29.968367851740016, CurrSamplesPerSec=30.092932278769933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:53,467] [INFO] [timer.py:197:stop] 0/2498, RunningAvgSamplesPerSec=29.968423923257905, CurrSamplesPerSec=30.1089787607193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:55:59,903] [INFO] [timer.py:197:stop] 0/2499, RunningAvgSamplesPerSec=29.968658616858523, CurrSamplesPerSec=30.5661373980471, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:56:06,350] [INFO] [logging.py:68:log_dist] [Rank 0] step=2500, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:56:06,350] [INFO] [timer.py:197:stop] 0/2500, RunningAvgSamplesPerSec=29.968762717812083, CurrSamplesPerSec=30.230978087350355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 62.0} +[2022-12-14 20:56:12,934] [INFO] [timer.py:197:stop] 0/2501, RunningAvgSamplesPerSec=29.968756731990776, CurrSamplesPerSec=29.95381161005467, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:56:19,516] [INFO] [timer.py:197:stop] 0/2502, RunningAvgSamplesPerSec=29.96888680304407, CurrSamplesPerSec=30.297499985045125, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:56:26,465] [INFO] [timer.py:197:stop] 0/2503, RunningAvgSamplesPerSec=29.96857438577754, CurrSamplesPerSec=29.207377522842997, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:56:33,357] [INFO] [timer.py:197:stop] 0/2504, RunningAvgSamplesPerSec=29.968647365650213, CurrSamplesPerSec=30.15228893489763, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:56:40,511] [INFO] [timer.py:197:stop] 0/2505, RunningAvgSamplesPerSec=29.96866388940032, CurrSamplesPerSec=30.01006344657841, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:56:47,121] [INFO] [timer.py:197:stop] 0/2506, RunningAvgSamplesPerSec=29.968774745747716, CurrSamplesPerSec=30.248842295097077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:56:54,323] [INFO] [timer.py:197:stop] 0/2507, RunningAvgSamplesPerSec=29.968747985540574, CurrSamplesPerSec=29.90188997535648, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:00,637] [INFO] [timer.py:197:stop] 0/2508, RunningAvgSamplesPerSec=29.968878975558415, CurrSamplesPerSec=30.300642911872895, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:07,180] [INFO] [timer.py:197:stop] 0/2509, RunningAvgSamplesPerSec=29.96867178024744, CurrSamplesPerSec=29.45828669354874, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:13,772] [INFO] [logging.py:68:log_dist] [Rank 0] step=2510, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:57:13,772] [INFO] [timer.py:197:stop] 0/2510, RunningAvgSamplesPerSec=29.96868926587792, CurrSamplesPerSec=30.012589982552736, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:20,179] [INFO] [timer.py:197:stop] 0/2511, RunningAvgSamplesPerSec=29.96842841182263, CurrSamplesPerSec=29.328188686015032, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:27,183] [INFO] [timer.py:197:stop] 0/2512, RunningAvgSamplesPerSec=29.968132310305666, CurrSamplesPerSec=29.24319224194739, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:29,566] [INFO] [timer.py:197:stop] 0/2513, RunningAvgSamplesPerSec=29.968254446790656, CurrSamplesPerSec=30.277986716667623, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:31,713] [INFO] [timer.py:197:stop] 0/2514, RunningAvgSamplesPerSec=29.968339774978315, CurrSamplesPerSec=30.18414235228225, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:33,838] [INFO] [timer.py:197:stop] 0/2515, RunningAvgSamplesPerSec=29.968548396063902, CurrSamplesPerSec=30.50193554896914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:36,021] [INFO] [timer.py:197:stop] 0/2516, RunningAvgSamplesPerSec=29.968430554770375, CurrSamplesPerSec=29.675194173286876, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:38,144] [INFO] [timer.py:197:stop] 0/2517, RunningAvgSamplesPerSec=29.968646624463165, CurrSamplesPerSec=30.52187745242843, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:40,257] [INFO] [timer.py:197:stop] 0/2518, RunningAvgSamplesPerSec=29.968923549344282, CurrSamplesPerSec=30.681967066961636, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:42,389] [INFO] [timer.py:197:stop] 0/2519, RunningAvgSamplesPerSec=29.969089169027825, CurrSamplesPerSec=30.391666265687032, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:57:44,280] [INFO] [logging.py:68:log_dist] [Rank 0] step=2520, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:57:44,280] [INFO] [timer.py:197:stop] 0/2520, RunningAvgSamplesPerSec=29.970614818117323, CurrSamplesPerSec=34.375252882132784, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:58:24,681] [INFO] [timer.py:197:stop] 0/2521, RunningAvgSamplesPerSec=29.970456281243624, CurrSamplesPerSec=29.57650974529659, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:58:30,994] [INFO] [timer.py:197:stop] 0/2522, RunningAvgSamplesPerSec=29.97046881604357, CurrSamplesPerSec=30.002077291192613, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:58:37,687] [INFO] [timer.py:197:stop] 0/2523, RunningAvgSamplesPerSec=29.970379845291518, CurrSamplesPerSec=29.747839022945808, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:58:43,653] [INFO] [timer.py:197:stop] 0/2524, RunningAvgSamplesPerSec=29.97005917591604, CurrSamplesPerSec=29.182892995117403, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:58:50,036] [INFO] [timer.py:197:stop] 0/2525, RunningAvgSamplesPerSec=29.969737014355918, CurrSamplesPerSec=29.178699406263636, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 63.0} +[2022-12-14 20:58:56,143] [INFO] [timer.py:197:stop] 0/2526, RunningAvgSamplesPerSec=29.969976123962628, CurrSamplesPerSec=30.585647586622347, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:59:02,700] [INFO] [timer.py:197:stop] 0/2527, RunningAvgSamplesPerSec=29.970077489822952, CurrSamplesPerSec=30.228128716082303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:59:09,170] [INFO] [timer.py:197:stop] 0/2528, RunningAvgSamplesPerSec=29.969826553653093, CurrSamplesPerSec=29.349336157378257, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:59:15,806] [INFO] [timer.py:197:stop] 0/2529, RunningAvgSamplesPerSec=29.96975449081604, CurrSamplesPerSec=29.788823142902768, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:59:22,037] [INFO] [logging.py:68:log_dist] [Rank 0] step=2530, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 20:59:22,038] [INFO] [timer.py:197:stop] 0/2530, RunningAvgSamplesPerSec=29.969705000624856, CurrSamplesPerSec=29.845163198347393, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:59:28,954] [INFO] [timer.py:197:stop] 0/2531, RunningAvgSamplesPerSec=29.969563841790645, CurrSamplesPerSec=29.616914982656976, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:59:35,583] [INFO] [timer.py:197:stop] 0/2532, RunningAvgSamplesPerSec=29.969566118751338, CurrSamplesPerSec=29.975325659442642, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:59:41,931] [INFO] [timer.py:197:stop] 0/2533, RunningAvgSamplesPerSec=29.969722531085914, CurrSamplesPerSec=30.370742944034248, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:59:48,292] [INFO] [timer.py:197:stop] 0/2534, RunningAvgSamplesPerSec=29.969862748135508, CurrSamplesPerSec=30.329006597495386, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 20:59:54,538] [INFO] [timer.py:197:stop] 0/2535, RunningAvgSamplesPerSec=29.96962799874457, CurrSamplesPerSec=29.386806169144272, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:00:01,396] [INFO] [timer.py:197:stop] 0/2536, RunningAvgSamplesPerSec=29.969622209330367, CurrSamplesPerSec=29.954964798080102, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:00:07,758] [INFO] [timer.py:197:stop] 0/2537, RunningAvgSamplesPerSec=29.969629188027223, CurrSamplesPerSec=29.987323650852005, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:00:14,486] [INFO] [timer.py:197:stop] 0/2538, RunningAvgSamplesPerSec=29.969837055819614, CurrSamplesPerSec=30.506216525298576, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:00:21,250] [INFO] [timer.py:197:stop] 0/2539, RunningAvgSamplesPerSec=29.9697398909537, CurrSamplesPerSec=29.725340030027045, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:00:27,433] [INFO] [logging.py:68:log_dist] [Rank 0] step=2540, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:00:27,434] [INFO] [timer.py:197:stop] 0/2540, RunningAvgSamplesPerSec=29.96972602717177, CurrSamplesPerSec=29.934594858553744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:00:33,912] [INFO] [timer.py:197:stop] 0/2541, RunningAvgSamplesPerSec=29.969574440947017, CurrSamplesPerSec=29.589726710372254, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:00:40,246] [INFO] [timer.py:197:stop] 0/2542, RunningAvgSamplesPerSec=29.969430148288158, CurrSamplesPerSec=29.607497251686407, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:00:46,772] [INFO] [timer.py:197:stop] 0/2543, RunningAvgSamplesPerSec=29.969395976167387, CurrSamplesPerSec=29.882849543361534, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:00:53,373] [INFO] [timer.py:197:stop] 0/2544, RunningAvgSamplesPerSec=29.96928588863634, CurrSamplesPerSec=29.692141349108354, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:00,161] [INFO] [timer.py:197:stop] 0/2545, RunningAvgSamplesPerSec=29.969505049642393, CurrSamplesPerSec=30.537168859387176, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:06,481] [INFO] [timer.py:197:stop] 0/2546, RunningAvgSamplesPerSec=29.969282665741968, CurrSamplesPerSec=29.414238248843585, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:13,226] [INFO] [timer.py:197:stop] 0/2547, RunningAvgSamplesPerSec=29.96922531717784, CurrSamplesPerSec=29.824037643541164, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:19,555] [INFO] [timer.py:197:stop] 0/2548, RunningAvgSamplesPerSec=29.969112805760552, CurrSamplesPerSec=29.685482277265816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:25,658] [INFO] [timer.py:197:stop] 0/2549, RunningAvgSamplesPerSec=29.968951086724463, CurrSamplesPerSec=29.56279667645282, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:32,128] [INFO] [logging.py:68:log_dist] [Rank 0] step=2550, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:01:32,129] [INFO] [timer.py:197:stop] 0/2550, RunningAvgSamplesPerSec=29.968838626233634, CurrSamplesPerSec=29.685114604905603, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 63.01} +[2022-12-14 21:01:38,284] [INFO] [timer.py:197:stop] 0/2551, RunningAvgSamplesPerSec=29.96897337428437, CurrSamplesPerSec=30.316292007618728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:44,673] [INFO] [timer.py:197:stop] 0/2552, RunningAvgSamplesPerSec=29.969058904653725, CurrSamplesPerSec=30.188674084782082, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:47,292] [INFO] [timer.py:197:stop] 0/2553, RunningAvgSamplesPerSec=29.969024953837298, CurrSamplesPerSec=29.882699845886194, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:49,423] [INFO] [timer.py:197:stop] 0/2554, RunningAvgSamplesPerSec=29.969191676191535, CurrSamplesPerSec=30.400625508780816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:51,614] [INFO] [timer.py:197:stop] 0/2555, RunningAvgSamplesPerSec=29.96907264296363, CurrSamplesPerSec=29.668349235229098, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:53,754] [INFO] [timer.py:197:stop] 0/2556, RunningAvgSamplesPerSec=29.969340661334627, CurrSamplesPerSec=30.669585605113603, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:55,889] [INFO] [timer.py:197:stop] 0/2557, RunningAvgSamplesPerSec=29.96948380863039, CurrSamplesPerSec=30.339598808412543, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:01:57,983] [INFO] [timer.py:197:stop] 0/2558, RunningAvgSamplesPerSec=29.969851070537, CurrSamplesPerSec=30.93854691941642, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:02:00,137] [INFO] [timer.py:197:stop] 0/2559, RunningAvgSamplesPerSec=29.969889162999603, CurrSamplesPerSec=30.067570964208187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:02:02,043] [INFO] [logging.py:68:log_dist] [Rank 0] step=2560, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:02:02,044] [INFO] [timer.py:197:stop] 0/2560, RunningAvgSamplesPerSec=29.97127412349058, CurrSamplesPerSec=33.9873329076615, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:02:39,415] [INFO] [timer.py:197:stop] 0/2561, RunningAvgSamplesPerSec=29.971251734221376, CurrSamplesPerSec=29.914089257468564, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:02:45,863] [INFO] [timer.py:197:stop] 0/2562, RunningAvgSamplesPerSec=29.97119214109778, CurrSamplesPerSec=29.819465651102124, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:02:52,225] [INFO] [timer.py:197:stop] 0/2563, RunningAvgSamplesPerSec=29.970590437986925, CurrSamplesPerSec=28.505556362610747, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:02:57,976] [INFO] [timer.py:197:stop] 0/2564, RunningAvgSamplesPerSec=29.970903895093628, CurrSamplesPerSec=30.795770018564504, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:03,875] [INFO] [timer.py:197:stop] 0/2565, RunningAvgSamplesPerSec=29.970556256442396, CurrSamplesPerSec=29.105619819566822, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:10,051] [INFO] [timer.py:197:stop] 0/2566, RunningAvgSamplesPerSec=29.97040201841346, CurrSamplesPerSec=29.58023825035064, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:16,446] [INFO] [timer.py:197:stop] 0/2567, RunningAvgSamplesPerSec=29.9703664647348, CurrSamplesPerSec=29.87948337583325, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:22,377] [INFO] [timer.py:197:stop] 0/2568, RunningAvgSamplesPerSec=29.97028175625435, CurrSamplesPerSec=29.75456897875168, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:28,672] [INFO] [timer.py:197:stop] 0/2569, RunningAvgSamplesPerSec=29.970020971047123, CurrSamplesPerSec=29.315466802434827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:34,739] [INFO] [logging.py:68:log_dist] [Rank 0] step=2570, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:03:34,740] [INFO] [timer.py:197:stop] 0/2570, RunningAvgSamplesPerSec=29.96997012486843, CurrSamplesPerSec=29.840014175469456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:40,895] [INFO] [timer.py:197:stop] 0/2571, RunningAvgSamplesPerSec=29.969707806900818, CurrSamplesPerSec=29.31088936972004, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:47,535] [INFO] [timer.py:197:stop] 0/2572, RunningAvgSamplesPerSec=29.96950061145781, CurrSamplesPerSec=29.44650795076852, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:53,585] [INFO] [timer.py:197:stop] 0/2573, RunningAvgSamplesPerSec=29.969471707740546, CurrSamplesPerSec=29.89537288772095, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:03:59,777] [INFO] [timer.py:197:stop] 0/2574, RunningAvgSamplesPerSec=29.969308564371843, CurrSamplesPerSec=29.55565853894319, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:04:06,064] [INFO] [timer.py:197:stop] 0/2575, RunningAvgSamplesPerSec=29.969203701231173, CurrSamplesPerSec=29.701902219735835, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 64.0} +[2022-12-14 21:04:12,392] [INFO] [timer.py:197:stop] 0/2576, RunningAvgSamplesPerSec=29.96922677447503, CurrSamplesPerSec=30.028712114034075, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:04:18,652] [INFO] [timer.py:197:stop] 0/2577, RunningAvgSamplesPerSec=29.969149111688395, CurrSamplesPerSec=29.770570199691555, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:04:24,458] [INFO] [timer.py:197:stop] 0/2578, RunningAvgSamplesPerSec=29.969421902050154, CurrSamplesPerSec=30.688722892398218, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:04:30,532] [INFO] [timer.py:197:stop] 0/2579, RunningAvgSamplesPerSec=29.96962123499709, CurrSamplesPerSec=30.49205749333642, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:04:36,337] [INFO] [logging.py:68:log_dist] [Rank 0] step=2580, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:04:36,337] [INFO] [timer.py:197:stop] 0/2580, RunningAvgSamplesPerSec=29.969740146753683, CurrSamplesPerSec=30.279342605439183, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:04:42,407] [INFO] [timer.py:197:stop] 0/2581, RunningAvgSamplesPerSec=29.969740114317112, CurrSamplesPerSec=29.96965649306658, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:04:48,290] [INFO] [timer.py:197:stop] 0/2582, RunningAvgSamplesPerSec=29.96959590886051, CurrSamplesPerSec=29.602250356084355, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:04:54,857] [INFO] [timer.py:197:stop] 0/2583, RunningAvgSamplesPerSec=29.969613263710503, CurrSamplesPerSec=30.014455798761542, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:01,065] [INFO] [timer.py:197:stop] 0/2584, RunningAvgSamplesPerSec=29.96975139771289, CurrSamplesPerSec=30.33056925467274, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:07,392] [INFO] [timer.py:197:stop] 0/2585, RunningAvgSamplesPerSec=29.96949565230987, CurrSamplesPerSec=29.323402271367755, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:13,382] [INFO] [timer.py:197:stop] 0/2586, RunningAvgSamplesPerSec=29.96947726524469, CurrSamplesPerSec=29.92205865108131, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:19,574] [INFO] [timer.py:197:stop] 0/2587, RunningAvgSamplesPerSec=29.969290146773602, CurrSamplesPerSec=29.493455932554607, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:25,814] [INFO] [timer.py:197:stop] 0/2588, RunningAvgSamplesPerSec=29.969305231138414, CurrSamplesPerSec=30.008349133891663, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:31,784] [INFO] [timer.py:197:stop] 0/2589, RunningAvgSamplesPerSec=29.96924233937472, CurrSamplesPerSec=29.80748242212658, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:37,690] [INFO] [logging.py:68:log_dist] [Rank 0] step=2590, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:05:37,690] [INFO] [timer.py:197:stop] 0/2590, RunningAvgSamplesPerSec=29.968978490582245, CurrSamplesPerSec=29.30160765330254, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:43,567] [INFO] [timer.py:197:stop] 0/2591, RunningAvgSamplesPerSec=29.969174689686366, CurrSamplesPerSec=30.485692639642995, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:49,528] [INFO] [timer.py:197:stop] 0/2592, RunningAvgSamplesPerSec=29.96882696689465, CurrSamplesPerSec=29.09483718310299, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:51,919] [INFO] [timer.py:197:stop] 0/2593, RunningAvgSamplesPerSec=29.969150983186193, CurrSamplesPerSec=30.832539285907497, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:54,079] [INFO] [timer.py:197:stop] 0/2594, RunningAvgSamplesPerSec=29.96931483369047, CurrSamplesPerSec=30.39995415694474, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:56,206] [INFO] [timer.py:197:stop] 0/2595, RunningAvgSamplesPerSec=29.96949586225509, CurrSamplesPerSec=30.446188252816977, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:05:58,373] [INFO] [timer.py:197:stop] 0/2596, RunningAvgSamplesPerSec=29.969462819466653, CurrSamplesPerSec=29.88402721580999, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:06:00,498] [INFO] [timer.py:197:stop] 0/2597, RunningAvgSamplesPerSec=29.969655857394407, CurrSamplesPerSec=30.478908236200322, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:06:02,667] [INFO] [timer.py:197:stop] 0/2598, RunningAvgSamplesPerSec=29.969611341705747, CurrSamplesPerSec=29.854536856474475, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:06:04,840] [INFO] [timer.py:197:stop] 0/2599, RunningAvgSamplesPerSec=29.969580476198416, CurrSamplesPerSec=29.889667357690417, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:06:06,735] [INFO] [logging.py:68:log_dist] [Rank 0] step=2600, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:06:06,735] [INFO] [timer.py:197:stop] 0/2600, RunningAvgSamplesPerSec=29.970994172984867, CurrSamplesPerSec=34.15510502389078, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 64.01} +[2022-12-14 21:06:44,578] [INFO] [timer.py:197:stop] 0/2601, RunningAvgSamplesPerSec=29.970835540570018, CurrSamplesPerSec=29.564300909852246, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:06:50,794] [INFO] [timer.py:197:stop] 0/2602, RunningAvgSamplesPerSec=29.97075932569012, CurrSamplesPerSec=29.773977919144823, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:06:56,609] [INFO] [timer.py:197:stop] 0/2603, RunningAvgSamplesPerSec=29.970777545875144, CurrSamplesPerSec=30.01822505237958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:02,573] [INFO] [timer.py:197:stop] 0/2604, RunningAvgSamplesPerSec=29.971115538441047, CurrSamplesPerSec=30.876810437283954, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:08,495] [INFO] [timer.py:197:stop] 0/2605, RunningAvgSamplesPerSec=29.97133076681948, CurrSamplesPerSec=30.542022672977396, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:14,527] [INFO] [timer.py:197:stop] 0/2606, RunningAvgSamplesPerSec=29.971361045520187, CurrSamplesPerSec=30.05038439037234, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:20,399] [INFO] [timer.py:197:stop] 0/2607, RunningAvgSamplesPerSec=29.971395134267137, CurrSamplesPerSec=30.060426017781687, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:26,300] [INFO] [timer.py:197:stop] 0/2608, RunningAvgSamplesPerSec=29.970933721559806, CurrSamplesPerSec=28.815317247049368, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:32,418] [INFO] [timer.py:197:stop] 0/2609, RunningAvgSamplesPerSec=29.971044313475797, CurrSamplesPerSec=30.26204620395075, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:38,494] [INFO] [logging.py:68:log_dist] [Rank 0] step=2610, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:07:38,495] [INFO] [timer.py:197:stop] 0/2610, RunningAvgSamplesPerSec=29.971113693062563, CurrSamplesPerSec=30.153084874328005, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:44,770] [INFO] [timer.py:197:stop] 0/2611, RunningAvgSamplesPerSec=29.97111400411118, CurrSamplesPerSec=29.971925240865854, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:50,940] [INFO] [timer.py:197:stop] 0/2612, RunningAvgSamplesPerSec=29.971154988644294, CurrSamplesPerSec=30.078466640058284, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:07:56,712] [INFO] [timer.py:197:stop] 0/2613, RunningAvgSamplesPerSec=29.971147369620926, CurrSamplesPerSec=29.95127490891117, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:02,873] [INFO] [timer.py:197:stop] 0/2614, RunningAvgSamplesPerSec=29.971157753771323, CurrSamplesPerSec=29.998295329522502, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:08,904] [INFO] [timer.py:197:stop] 0/2615, RunningAvgSamplesPerSec=29.971052647170197, CurrSamplesPerSec=29.699007128701084, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:14,963] [INFO] [timer.py:197:stop] 0/2616, RunningAvgSamplesPerSec=29.970900817531483, CurrSamplesPerSec=29.579354926863026, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:20,867] [INFO] [timer.py:197:stop] 0/2617, RunningAvgSamplesPerSec=29.97089166443031, CurrSamplesPerSec=29.946984550668816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:27,033] [INFO] [timer.py:197:stop] 0/2618, RunningAvgSamplesPerSec=29.971044584944075, CurrSamplesPerSec=30.376341449255747, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:33,179] [INFO] [timer.py:197:stop] 0/2619, RunningAvgSamplesPerSec=29.97078923722228, CurrSamplesPerSec=29.317368617870834, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:39,311] [INFO] [logging.py:68:log_dist] [Rank 0] step=2620, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:08:39,312] [INFO] [timer.py:197:stop] 0/2620, RunningAvgSamplesPerSec=29.970360238193873, CurrSamplesPerSec=28.888222189530566, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:45,316] [INFO] [timer.py:197:stop] 0/2621, RunningAvgSamplesPerSec=29.9704422791156, CurrSamplesPerSec=30.186776362287933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:51,479] [INFO] [timer.py:197:stop] 0/2622, RunningAvgSamplesPerSec=29.970421147455884, CurrSamplesPerSec=29.91517937988586, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:08:57,589] [INFO] [timer.py:197:stop] 0/2623, RunningAvgSamplesPerSec=29.970262960499195, CurrSamplesPerSec=29.56146838981537, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:03,697] [INFO] [timer.py:197:stop] 0/2624, RunningAvgSamplesPerSec=29.97038676450003, CurrSamplesPerSec=30.298430141375878, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:09,659] [INFO] [timer.py:197:stop] 0/2625, RunningAvgSamplesPerSec=29.970180816277825, CurrSamplesPerSec=29.43974549170219, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 65.0} +[2022-12-14 21:09:15,628] [INFO] [timer.py:197:stop] 0/2626, RunningAvgSamplesPerSec=29.970217914544456, CurrSamplesPerSec=30.067843765090043, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:21,756] [INFO] [timer.py:197:stop] 0/2627, RunningAvgSamplesPerSec=29.97003059513881, CurrSamplesPerSec=29.486438688830862, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:27,715] [INFO] [timer.py:197:stop] 0/2628, RunningAvgSamplesPerSec=29.97021691992075, CurrSamplesPerSec=30.467437006622387, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:33,494] [INFO] [timer.py:197:stop] 0/2629, RunningAvgSamplesPerSec=29.969550432041046, CurrSamplesPerSec=28.315958449815327, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:39,443] [INFO] [logging.py:68:log_dist] [Rank 0] step=2630, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:09:39,444] [INFO] [timer.py:197:stop] 0/2630, RunningAvgSamplesPerSec=29.969743053422825, CurrSamplesPerSec=30.4844532221411, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:45,497] [INFO] [timer.py:197:stop] 0/2631, RunningAvgSamplesPerSec=29.96939145771392, CurrSamplesPerSec=29.073043956123698, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:51,384] [INFO] [timer.py:197:stop] 0/2632, RunningAvgSamplesPerSec=29.9693104792257, CurrSamplesPerSec=29.757920253764727, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:53,909] [INFO] [timer.py:197:stop] 0/2633, RunningAvgSamplesPerSec=29.969271108507577, CurrSamplesPerSec=29.866082775087143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:56,036] [INFO] [timer.py:197:stop] 0/2634, RunningAvgSamplesPerSec=29.969459040725468, CurrSamplesPerSec=30.472206411713223, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:09:58,204] [INFO] [timer.py:197:stop] 0/2635, RunningAvgSamplesPerSec=29.969423971332866, CurrSamplesPerSec=29.877404846801692, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:10:00,287] [INFO] [timer.py:197:stop] 0/2636, RunningAvgSamplesPerSec=29.96983961367916, CurrSamplesPerSec=31.10571975316167, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:10:02,393] [INFO] [timer.py:197:stop] 0/2637, RunningAvgSamplesPerSec=29.970132451700785, CurrSamplesPerSec=30.76185186182602, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:10:04,531] [INFO] [timer.py:197:stop] 0/2638, RunningAvgSamplesPerSec=29.970256601186623, CurrSamplesPerSec=30.301002047305087, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:10:06,696] [INFO] [timer.py:197:stop] 0/2639, RunningAvgSamplesPerSec=29.970239282667286, CurrSamplesPerSec=29.924657124221202, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:10:08,599] [INFO] [logging.py:68:log_dist] [Rank 0] step=2640, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:10:08,600] [INFO] [timer.py:197:stop] 0/2640, RunningAvgSamplesPerSec=29.97173958545306, CurrSamplesPerSec=34.52995502437102, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:10:46,902] [INFO] [timer.py:197:stop] 0/2641, RunningAvgSamplesPerSec=29.97205657877104, CurrSamplesPerSec=30.832294929147924, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:10:53,337] [INFO] [timer.py:197:stop] 0/2642, RunningAvgSamplesPerSec=29.972283338482182, CurrSamplesPerSec=30.582898215010392, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:10:59,605] [INFO] [timer.py:197:stop] 0/2643, RunningAvgSamplesPerSec=29.972278062022593, CurrSamplesPerSec=29.958354682165872, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:11:05,610] [INFO] [timer.py:197:stop] 0/2644, RunningAvgSamplesPerSec=29.97176087114447, CurrSamplesPerSec=28.665416262121926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:11:11,567] [INFO] [timer.py:197:stop] 0/2645, RunningAvgSamplesPerSec=29.971792893697724, CurrSamplesPerSec=30.056636063109703, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:11:17,860] [INFO] [timer.py:197:stop] 0/2646, RunningAvgSamplesPerSec=29.971866610341255, CurrSamplesPerSec=30.167974990410794, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:11:23,891] [INFO] [timer.py:197:stop] 0/2647, RunningAvgSamplesPerSec=29.972245827820075, CurrSamplesPerSec=31.009612634355314, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:11:29,777] [INFO] [timer.py:197:stop] 0/2648, RunningAvgSamplesPerSec=29.97263078740606, CurrSamplesPerSec=31.026669818239398, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:11:35,968] [INFO] [timer.py:197:stop] 0/2649, RunningAvgSamplesPerSec=29.972606565863263, CurrSamplesPerSec=29.90865316639438, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:11:41,897] [INFO] [logging.py:68:log_dist] [Rank 0] step=2650, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:11:41,898] [INFO] [timer.py:197:stop] 0/2650, RunningAvgSamplesPerSec=29.972662749110643, CurrSamplesPerSec=30.12212166383215, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0003, 'learning_rate': 1e-05, 'epoch': 66.0} +[2022-12-14 21:11:48,608] [INFO] [timer.py:197:stop] 0/2651, RunningAvgSamplesPerSec=29.97227307384091, CurrSamplesPerSec=28.974767217848516, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:11:54,526] [INFO] [timer.py:197:stop] 0/2652, RunningAvgSamplesPerSec=29.971858911074875, CurrSamplesPerSec=28.91349758856228, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:00,806] [INFO] [timer.py:197:stop] 0/2653, RunningAvgSamplesPerSec=29.97179504394757, CurrSamplesPerSec=29.803497873394942, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:06,742] [INFO] [timer.py:197:stop] 0/2654, RunningAvgSamplesPerSec=29.97163218871084, CurrSamplesPerSec=29.54603581224191, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:12,930] [INFO] [timer.py:197:stop] 0/2655, RunningAvgSamplesPerSec=29.971834721954295, CurrSamplesPerSec=30.518757859617022, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:18,917] [INFO] [timer.py:197:stop] 0/2656, RunningAvgSamplesPerSec=29.971867597892707, CurrSamplesPerSec=30.05934211433941, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:24,863] [INFO] [timer.py:197:stop] 0/2657, RunningAvgSamplesPerSec=29.97190678918133, CurrSamplesPerSec=30.07628282940552, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:30,628] [INFO] [timer.py:197:stop] 0/2658, RunningAvgSamplesPerSec=29.971998255234794, CurrSamplesPerSec=30.21682503639836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:36,562] [INFO] [timer.py:197:stop] 0/2659, RunningAvgSamplesPerSec=29.972075368868005, CurrSamplesPerSec=30.178298935478573, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:42,345] [INFO] [logging.py:68:log_dist] [Rank 0] step=2660, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:12:42,345] [INFO] [timer.py:197:stop] 0/2660, RunningAvgSamplesPerSec=29.972077500594047, CurrSamplesPerSec=29.977742567640913, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:48,317] [INFO] [timer.py:197:stop] 0/2661, RunningAvgSamplesPerSec=29.97206167100976, CurrSamplesPerSec=29.930045640635797, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:12:54,389] [INFO] [timer.py:197:stop] 0/2662, RunningAvgSamplesPerSec=29.971937297037677, CurrSamplesPerSec=29.644837482381377, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:00,579] [INFO] [timer.py:197:stop] 0/2663, RunningAvgSamplesPerSec=29.971641491215397, CurrSamplesPerSec=29.204933893941778, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:07,124] [INFO] [timer.py:197:stop] 0/2664, RunningAvgSamplesPerSec=29.97132831052104, CurrSamplesPerSec=29.16050837303357, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:13,328] [INFO] [timer.py:197:stop] 0/2665, RunningAvgSamplesPerSec=29.97138532732674, CurrSamplesPerSec=30.12393689397472, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:19,242] [INFO] [timer.py:197:stop] 0/2666, RunningAvgSamplesPerSec=29.97106253354665, CurrSamplesPerSec=29.135438231803665, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:25,281] [INFO] [timer.py:197:stop] 0/2667, RunningAvgSamplesPerSec=29.97120087221405, CurrSamplesPerSec=30.344324840343127, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:31,652] [INFO] [timer.py:197:stop] 0/2668, RunningAvgSamplesPerSec=29.97140962557201, CurrSamplesPerSec=30.538263177983044, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:37,532] [INFO] [timer.py:197:stop] 0/2669, RunningAvgSamplesPerSec=29.971206791868227, CurrSamplesPerSec=29.440039306693105, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:43,686] [INFO] [logging.py:68:log_dist] [Rank 0] step=2670, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:13:43,687] [INFO] [timer.py:197:stop] 0/2670, RunningAvgSamplesPerSec=29.971202394096437, CurrSamplesPerSec=29.95947812661029, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:49,850] [INFO] [timer.py:197:stop] 0/2671, RunningAvgSamplesPerSec=29.970942635326114, CurrSamplesPerSec=29.293575285511885, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:55,920] [INFO] [timer.py:197:stop] 0/2672, RunningAvgSamplesPerSec=29.971022381569718, CurrSamplesPerSec=30.185388018793432, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:13:58,368] [INFO] [timer.py:197:stop] 0/2673, RunningAvgSamplesPerSec=29.971048453820575, CurrSamplesPerSec=30.040823488821065, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:14:00,492] [INFO] [timer.py:197:stop] 0/2674, RunningAvgSamplesPerSec=29.971244148141214, CurrSamplesPerSec=30.503224914979516, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:14:02,665] [INFO] [timer.py:197:stop] 0/2675, RunningAvgSamplesPerSec=29.971192261266857, CurrSamplesPerSec=29.83318915122229, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 66.01} +[2022-12-14 21:14:04,821] [INFO] [timer.py:197:stop] 0/2676, RunningAvgSamplesPerSec=29.971267567585244, CurrSamplesPerSec=30.17392294503552, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:14:06,992] [INFO] [timer.py:197:stop] 0/2677, RunningAvgSamplesPerSec=29.971216029664806, CurrSamplesPerSec=29.83403464803212, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:14:09,112] [INFO] [timer.py:197:stop] 0/2678, RunningAvgSamplesPerSec=29.971434713132115, CurrSamplesPerSec=30.568062234002454, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:14:11,266] [INFO] [timer.py:197:stop] 0/2679, RunningAvgSamplesPerSec=29.971476020514963, CurrSamplesPerSec=30.08242391967485, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:14:13,226] [INFO] [logging.py:68:log_dist] [Rank 0] step=2680, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:14:13,227] [INFO] [timer.py:197:stop] 0/2680, RunningAvgSamplesPerSec=29.972653812225794, CurrSamplesPerSec=33.49642038694236, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:14:54,302] [INFO] [timer.py:197:stop] 0/2681, RunningAvgSamplesPerSec=29.97287490934995, CurrSamplesPerSec=30.576909850467448, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:15:01,338] [INFO] [timer.py:197:stop] 0/2682, RunningAvgSamplesPerSec=29.972649106417474, CurrSamplesPerSec=29.37969487678794, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:15:08,105] [INFO] [timer.py:197:stop] 0/2683, RunningAvgSamplesPerSec=29.972783408321284, CurrSamplesPerSec=30.337088916954148, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:15:15,370] [INFO] [timer.py:197:stop] 0/2684, RunningAvgSamplesPerSec=29.97298500858977, CurrSamplesPerSec=30.523404515639132, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:15:21,860] [INFO] [timer.py:197:stop] 0/2685, RunningAvgSamplesPerSec=29.97292598799605, CurrSamplesPerSec=29.815464651626655, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:15:28,081] [INFO] [timer.py:197:stop] 0/2686, RunningAvgSamplesPerSec=29.97305473843145, CurrSamplesPerSec=30.322521235386905, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:15:34,257] [INFO] [timer.py:197:stop] 0/2687, RunningAvgSamplesPerSec=29.973083139634348, CurrSamplesPerSec=30.049506403539105, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:15:40,919] [INFO] [timer.py:197:stop] 0/2688, RunningAvgSamplesPerSec=29.972674861938497, CurrSamplesPerSec=28.91514203602928, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:15:47,679] [INFO] [timer.py:197:stop] 0/2689, RunningAvgSamplesPerSec=29.972581306560727, CurrSamplesPerSec=29.72338162671057, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:15:54,059] [INFO] [logging.py:68:log_dist] [Rank 0] step=2690, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:15:54,060] [INFO] [timer.py:197:stop] 0/2690, RunningAvgSamplesPerSec=29.972329326146514, CurrSamplesPerSec=29.31022048004979, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:00,885] [INFO] [timer.py:197:stop] 0/2691, RunningAvgSamplesPerSec=29.971928109463065, CurrSamplesPerSec=28.9309295236921, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:08,259] [INFO] [timer.py:197:stop] 0/2692, RunningAvgSamplesPerSec=29.97155674968283, CurrSamplesPerSec=29.0051798310531, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:14,694] [INFO] [timer.py:197:stop] 0/2693, RunningAvgSamplesPerSec=29.97153106670369, CurrSamplesPerSec=29.902602798063896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:21,030] [INFO] [timer.py:197:stop] 0/2694, RunningAvgSamplesPerSec=29.971407509920226, CurrSamplesPerSec=29.642565607261602, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:27,365] [INFO] [timer.py:197:stop] 0/2695, RunningAvgSamplesPerSec=29.971212033174005, CurrSamplesPerSec=29.45407176188453, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:33,781] [INFO] [timer.py:197:stop] 0/2696, RunningAvgSamplesPerSec=29.970978285202044, CurrSamplesPerSec=29.354448831477637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:40,332] [INFO] [timer.py:197:stop] 0/2697, RunningAvgSamplesPerSec=29.970960025313946, CurrSamplesPerSec=29.921848524709965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:46,744] [INFO] [timer.py:197:stop] 0/2698, RunningAvgSamplesPerSec=29.97096517186806, CurrSamplesPerSec=29.984841559309643, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:52,926] [INFO] [timer.py:197:stop] 0/2699, RunningAvgSamplesPerSec=29.970694638364318, CurrSamplesPerSec=29.258670360358387, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:16:59,394] [INFO] [logging.py:68:log_dist] [Rank 0] step=2700, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:16:59,395] [INFO] [timer.py:197:stop] 0/2700, RunningAvgSamplesPerSec=29.970797342866597, CurrSamplesPerSec=30.250376248522475, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 67.0} +[2022-12-14 21:17:05,733] [INFO] [timer.py:197:stop] 0/2701, RunningAvgSamplesPerSec=29.97080364982679, CurrSamplesPerSec=29.987829498585707, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:17:12,102] [INFO] [timer.py:197:stop] 0/2702, RunningAvgSamplesPerSec=29.970776137781513, CurrSamplesPerSec=29.896704713477266, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:17:18,931] [INFO] [timer.py:197:stop] 0/2703, RunningAvgSamplesPerSec=29.970748962073912, CurrSamplesPerSec=29.89755381433599, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:17:24,852] [INFO] [timer.py:197:stop] 0/2704, RunningAvgSamplesPerSec=29.970922090670054, CurrSamplesPerSec=30.445956887913226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:17:31,989] [INFO] [timer.py:197:stop] 0/2705, RunningAvgSamplesPerSec=29.970700729722278, CurrSamplesPerSec=29.384290610342976, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:17:38,630] [INFO] [timer.py:197:stop] 0/2706, RunningAvgSamplesPerSec=29.970679390094325, CurrSamplesPerSec=29.913109215230048, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:17:45,209] [INFO] [timer.py:197:stop] 0/2707, RunningAvgSamplesPerSec=29.97087318618919, CurrSamplesPerSec=30.5042266751046, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:17:51,736] [INFO] [timer.py:197:stop] 0/2708, RunningAvgSamplesPerSec=29.970826122778245, CurrSamplesPerSec=29.844058264717187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:17:58,736] [INFO] [timer.py:197:stop] 0/2709, RunningAvgSamplesPerSec=29.97061926708172, CurrSamplesPerSec=29.421134100673406, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:05,630] [INFO] [logging.py:68:log_dist] [Rank 0] step=2710, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:18:05,631] [INFO] [timer.py:197:stop] 0/2710, RunningAvgSamplesPerSec=29.970576922260435, CurrSamplesPerSec=29.856386392936226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:11,758] [INFO] [timer.py:197:stop] 0/2711, RunningAvgSamplesPerSec=29.97050367183333, CurrSamplesPerSec=29.773446237134586, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:18,125] [INFO] [timer.py:197:stop] 0/2712, RunningAvgSamplesPerSec=29.970733491145992, CurrSamplesPerSec=30.60652617344676, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:20,585] [INFO] [timer.py:197:stop] 0/2713, RunningAvgSamplesPerSec=29.970929089251243, CurrSamplesPerSec=30.510547253276837, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:22,756] [INFO] [timer.py:197:stop] 0/2714, RunningAvgSamplesPerSec=29.970878946765268, CurrSamplesPerSec=29.835556663064793, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:24,869] [INFO] [timer.py:197:stop] 0/2715, RunningAvgSamplesPerSec=29.971128126657547, CurrSamplesPerSec=30.662498457939442, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:27,041] [INFO] [timer.py:197:stop] 0/2716, RunningAvgSamplesPerSec=29.971076654959532, CurrSamplesPerSec=29.83208178867495, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:29,197] [INFO] [timer.py:197:stop] 0/2717, RunningAvgSamplesPerSec=29.971219926049514, CurrSamplesPerSec=30.36517054623618, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:31,383] [INFO] [timer.py:197:stop] 0/2718, RunningAvgSamplesPerSec=29.971127359817068, CurrSamplesPerSec=29.721900652721438, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:33,512] [INFO] [timer.py:197:stop] 0/2719, RunningAvgSamplesPerSec=29.971289547069517, CurrSamplesPerSec=30.418363375054096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:18:35,429] [INFO] [logging.py:68:log_dist] [Rank 0] step=2720, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:18:35,429] [INFO] [timer.py:197:stop] 0/2720, RunningAvgSamplesPerSec=29.97253677311293, CurrSamplesPerSec=33.793397788285134, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:19:19,895] [INFO] [timer.py:197:stop] 0/2721, RunningAvgSamplesPerSec=29.97190715334369, CurrSamplesPerSec=28.35306565054139, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:19:26,758] [INFO] [timer.py:197:stop] 0/2722, RunningAvgSamplesPerSec=29.971302589078217, CurrSamplesPerSec=28.41299115090333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:19:34,214] [INFO] [timer.py:197:stop] 0/2723, RunningAvgSamplesPerSec=29.971127612110433, CurrSamplesPerSec=29.502632630283795, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:19:40,574] [INFO] [timer.py:197:stop] 0/2724, RunningAvgSamplesPerSec=29.9711265503812, CurrSamplesPerSec=29.968237863685303, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:19:47,178] [INFO] [timer.py:197:stop] 0/2725, RunningAvgSamplesPerSec=29.97125679613111, CurrSamplesPerSec=30.330031216371896, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 68.0} +[2022-12-14 21:19:54,036] [INFO] [timer.py:197:stop] 0/2726, RunningAvgSamplesPerSec=29.971224956491113, CurrSamplesPerSec=29.88477578468347, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:20:01,123] [INFO] [timer.py:197:stop] 0/2727, RunningAvgSamplesPerSec=29.97146205096314, CurrSamplesPerSec=30.631536341788188, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:20:07,808] [INFO] [timer.py:197:stop] 0/2728, RunningAvgSamplesPerSec=29.971240355679324, CurrSamplesPerSec=29.37906142794681, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:20:15,273] [INFO] [timer.py:197:stop] 0/2729, RunningAvgSamplesPerSec=29.971026943496458, CurrSamplesPerSec=29.400346755600825, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:20:21,939] [INFO] [logging.py:68:log_dist] [Rank 0] step=2730, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:20:21,939] [INFO] [timer.py:197:stop] 0/2730, RunningAvgSamplesPerSec=29.971260706427383, CurrSamplesPerSec=30.622590714572294, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:20:29,149] [INFO] [timer.py:197:stop] 0/2731, RunningAvgSamplesPerSec=29.971053946652866, CurrSamplesPerSec=29.417435923062744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:20:35,875] [INFO] [timer.py:197:stop] 0/2732, RunningAvgSamplesPerSec=29.97104280627684, CurrSamplesPerSec=29.94067153945105, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:20:41,988] [INFO] [timer.py:197:stop] 0/2733, RunningAvgSamplesPerSec=29.9709408356332, CurrSamplesPerSec=29.695123795385143, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:20:49,130] [INFO] [timer.py:197:stop] 0/2734, RunningAvgSamplesPerSec=29.97101176662766, CurrSamplesPerSec=30.16598495001581, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:20:55,380] [INFO] [timer.py:197:stop] 0/2735, RunningAvgSamplesPerSec=29.970985467731808, CurrSamplesPerSec=29.899308775624633, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:21:02,383] [INFO] [timer.py:197:stop] 0/2736, RunningAvgSamplesPerSec=29.970837001909793, CurrSamplesPerSec=29.570501790519565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:21:08,734] [INFO] [timer.py:197:stop] 0/2737, RunningAvgSamplesPerSec=29.97099948744266, CurrSamplesPerSec=30.42192101749018, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:21:15,315] [INFO] [timer.py:197:stop] 0/2738, RunningAvgSamplesPerSec=29.9708417991751, CurrSamplesPerSec=29.54568459392762, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:21:21,844] [INFO] [timer.py:197:stop] 0/2739, RunningAvgSamplesPerSec=29.970931796945457, CurrSamplesPerSec=30.219206204430918, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:21:28,541] [INFO] [logging.py:68:log_dist] [Rank 0] step=2740, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:21:28,542] [INFO] [timer.py:197:stop] 0/2740, RunningAvgSamplesPerSec=29.97079583116385, CurrSamplesPerSec=29.603223193157135, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:21:35,621] [INFO] [timer.py:197:stop] 0/2741, RunningAvgSamplesPerSec=29.970458025712038, CurrSamplesPerSec=29.073245478889753, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:21:42,378] [INFO] [timer.py:197:stop] 0/2742, RunningAvgSamplesPerSec=29.970423308052602, CurrSamplesPerSec=29.875632505692483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:21:49,565] [INFO] [timer.py:197:stop] 0/2743, RunningAvgSamplesPerSec=29.97029132396976, CurrSamplesPerSec=29.612968149083144, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:21:56,268] [INFO] [timer.py:197:stop] 0/2744, RunningAvgSamplesPerSec=29.9701092240146, CurrSamplesPerSec=29.479152843275305, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:03,189] [INFO] [timer.py:197:stop] 0/2745, RunningAvgSamplesPerSec=29.969895239290246, CurrSamplesPerSec=29.394419805718037, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:09,453] [INFO] [timer.py:197:stop] 0/2746, RunningAvgSamplesPerSec=29.969910778911807, CurrSamplesPerSec=30.012596693706115, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:16,134] [INFO] [timer.py:197:stop] 0/2747, RunningAvgSamplesPerSec=29.970003611372952, CurrSamplesPerSec=30.226920363598488, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:22,886] [INFO] [timer.py:197:stop] 0/2748, RunningAvgSamplesPerSec=29.96986693685372, CurrSamplesPerSec=29.5993354922704, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:29,607] [INFO] [timer.py:197:stop] 0/2749, RunningAvgSamplesPerSec=29.96951361895953, CurrSamplesPerSec=29.029737383708486, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:36,372] [INFO] [logging.py:68:log_dist] [Rank 0] step=2750, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:22:36,373] [INFO] [timer.py:197:stop] 0/2750, RunningAvgSamplesPerSec=29.969596589386487, CurrSamplesPerSec=30.199263613298882, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 68.01} +[2022-12-14 21:22:42,715] [INFO] [timer.py:197:stop] 0/2751, RunningAvgSamplesPerSec=29.96963938097504, CurrSamplesPerSec=30.087694043636414, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:49,625] [INFO] [timer.py:197:stop] 0/2752, RunningAvgSamplesPerSec=29.96961770283869, CurrSamplesPerSec=29.91014281209574, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:52,002] [INFO] [timer.py:197:stop] 0/2753, RunningAvgSamplesPerSec=29.969762207509252, CurrSamplesPerSec=30.372492040437148, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:54,114] [INFO] [timer.py:197:stop] 0/2754, RunningAvgSamplesPerSec=29.970014187960377, CurrSamplesPerSec=30.679631632465416, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:56,252] [INFO] [timer.py:197:stop] 0/2755, RunningAvgSamplesPerSec=29.97013677381508, CurrSamplesPerSec=30.31133511134839, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:22:58,422] [INFO] [timer.py:197:stop] 0/2756, RunningAvgSamplesPerSec=29.970089687412838, CurrSamplesPerSec=29.841019289715245, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:23:00,537] [INFO] [timer.py:197:stop] 0/2757, RunningAvgSamplesPerSec=29.970321998845577, CurrSamplesPerSec=30.62406847933251, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:23:02,650] [INFO] [timer.py:197:stop] 0/2758, RunningAvgSamplesPerSec=29.97058109082788, CurrSamplesPerSec=30.70180102210684, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:23:04,826] [INFO] [timer.py:197:stop] 0/2759, RunningAvgSamplesPerSec=29.970507468394352, CurrSamplesPerSec=29.768968973054104, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:23:06,767] [INFO] [logging.py:68:log_dist] [Rank 0] step=2760, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:23:06,767] [INFO] [timer.py:197:stop] 0/2760, RunningAvgSamplesPerSec=29.971632484179324, CurrSamplesPerSec=33.43147937672226, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:23:49,673] [INFO] [timer.py:197:stop] 0/2761, RunningAvgSamplesPerSec=29.971749294689996, CurrSamplesPerSec=30.297414495601135, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:23:56,485] [INFO] [timer.py:197:stop] 0/2762, RunningAvgSamplesPerSec=29.97196536120027, CurrSamplesPerSec=30.580194625659683, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:24:03,776] [INFO] [timer.py:197:stop] 0/2763, RunningAvgSamplesPerSec=29.972192830261253, CurrSamplesPerSec=30.61344432396333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:24:10,555] [INFO] [timer.py:197:stop] 0/2764, RunningAvgSamplesPerSec=29.97235497159492, CurrSamplesPerSec=30.426817583697552, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:24:17,041] [INFO] [timer.py:197:stop] 0/2765, RunningAvgSamplesPerSec=29.97246151714901, CurrSamplesPerSec=30.26965937373987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:24:23,721] [INFO] [timer.py:197:stop] 0/2766, RunningAvgSamplesPerSec=29.972466134782625, CurrSamplesPerSec=29.98523009172213, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:24:30,230] [INFO] [timer.py:197:stop] 0/2767, RunningAvgSamplesPerSec=29.972457754792526, CurrSamplesPerSec=29.94931335429062, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:24:37,265] [INFO] [timer.py:197:stop] 0/2768, RunningAvgSamplesPerSec=29.972209576562, CurrSamplesPerSec=29.301361372764294, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:24:44,440] [INFO] [timer.py:197:stop] 0/2769, RunningAvgSamplesPerSec=29.97154268049546, CurrSamplesPerSec=28.233892582966337, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:24:50,856] [INFO] [logging.py:68:log_dist] [Rank 0] step=2770, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:24:50,857] [INFO] [timer.py:197:stop] 0/2770, RunningAvgSamplesPerSec=29.97157567708467, CurrSamplesPerSec=30.063156319747755, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:24:57,811] [INFO] [timer.py:197:stop] 0/2771, RunningAvgSamplesPerSec=29.97172278025283, CurrSamplesPerSec=30.384514365879483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:25:04,419] [INFO] [timer.py:197:stop] 0/2772, RunningAvgSamplesPerSec=29.97200732792897, CurrSamplesPerSec=30.781199803686835, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:25:10,882] [INFO] [timer.py:197:stop] 0/2773, RunningAvgSamplesPerSec=29.971904868478745, CurrSamplesPerSec=29.690755438115836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:25:17,522] [INFO] [timer.py:197:stop] 0/2774, RunningAvgSamplesPerSec=29.971806566529693, CurrSamplesPerSec=29.701866068694972, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:25:25,307] [INFO] [timer.py:197:stop] 0/2775, RunningAvgSamplesPerSec=29.971813074166707, CurrSamplesPerSec=29.98986311167637, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 69.0} +[2022-12-14 21:25:32,043] [INFO] [timer.py:197:stop] 0/2776, RunningAvgSamplesPerSec=29.971969402763325, CurrSamplesPerSec=30.41183285489739, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:25:38,734] [INFO] [timer.py:197:stop] 0/2777, RunningAvgSamplesPerSec=29.972142303553486, CurrSamplesPerSec=30.45957196110586, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:25:45,365] [INFO] [timer.py:197:stop] 0/2778, RunningAvgSamplesPerSec=29.971899282693887, CurrSamplesPerSec=29.312361674628587, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:25:52,334] [INFO] [timer.py:197:stop] 0/2779, RunningAvgSamplesPerSec=29.971911642823066, CurrSamplesPerSec=30.006262700535583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:25:58,986] [INFO] [logging.py:68:log_dist] [Rank 0] step=2780, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:25:58,987] [INFO] [timer.py:197:stop] 0/2780, RunningAvgSamplesPerSec=29.971921929311765, CurrSamplesPerSec=30.00051476944621, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:26:05,497] [INFO] [timer.py:197:stop] 0/2781, RunningAvgSamplesPerSec=29.97192753728724, CurrSamplesPerSec=29.98751459799137, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:26:11,912] [INFO] [timer.py:197:stop] 0/2782, RunningAvgSamplesPerSec=29.972172895136435, CurrSamplesPerSec=30.66990097695411, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:26:18,866] [INFO] [timer.py:197:stop] 0/2783, RunningAvgSamplesPerSec=29.971989084214464, CurrSamplesPerSec=29.46956368520695, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:26:25,678] [INFO] [timer.py:197:stop] 0/2784, RunningAvgSamplesPerSec=29.971892283639043, CurrSamplesPerSec=29.705087143147548, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:26:32,592] [INFO] [timer.py:197:stop] 0/2785, RunningAvgSamplesPerSec=29.9716422674092, CurrSamplesPerSec=29.291877924436065, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:26:38,974] [INFO] [timer.py:197:stop] 0/2786, RunningAvgSamplesPerSec=29.971582776585493, CurrSamplesPerSec=29.80692968436921, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:26:45,984] [INFO] [timer.py:197:stop] 0/2787, RunningAvgSamplesPerSec=29.971763067401394, CurrSamplesPerSec=30.4822446776389, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:26:52,726] [INFO] [timer.py:197:stop] 0/2788, RunningAvgSamplesPerSec=29.971682280045904, CurrSamplesPerSec=29.748366494021855, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:26:59,197] [INFO] [timer.py:197:stop] 0/2789, RunningAvgSamplesPerSec=29.97179267029622, CurrSamplesPerSec=30.28252959291961, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:05,685] [INFO] [logging.py:68:log_dist] [Rank 0] step=2790, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:27:05,685] [INFO] [timer.py:197:stop] 0/2790, RunningAvgSamplesPerSec=29.97174089222654, CurrSamplesPerSec=29.828127119931345, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:12,001] [INFO] [timer.py:197:stop] 0/2791, RunningAvgSamplesPerSec=29.97202735292439, CurrSamplesPerSec=30.792551802141542, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:18,640] [INFO] [timer.py:197:stop] 0/2792, RunningAvgSamplesPerSec=29.97194092979027, CurrSamplesPerSec=29.73283042307973, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:21,059] [INFO] [timer.py:197:stop] 0/2793, RunningAvgSamplesPerSec=29.97189089832615, CurrSamplesPerSec=29.832950431387054, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:23,186] [INFO] [timer.py:197:stop] 0/2794, RunningAvgSamplesPerSec=29.97206163248835, CurrSamplesPerSec=30.45628194707479, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:25,302] [INFO] [timer.py:197:stop] 0/2795, RunningAvgSamplesPerSec=29.972284653062797, CurrSamplesPerSec=30.608173400905674, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:27,452] [INFO] [timer.py:197:stop] 0/2796, RunningAvgSamplesPerSec=29.972340596931417, CurrSamplesPerSec=30.129410951290186, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:29,589] [INFO] [timer.py:197:stop] 0/2797, RunningAvgSamplesPerSec=29.972462303905232, CurrSamplesPerSec=30.316415266025853, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:31,735] [INFO] [timer.py:197:stop] 0/2798, RunningAvgSamplesPerSec=29.97254132109033, CurrSamplesPerSec=30.19503438280964, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:33,922] [INFO] [timer.py:197:stop] 0/2799, RunningAvgSamplesPerSec=29.972413097987992, CurrSamplesPerSec=29.61814041403207, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:27:35,841] [INFO] [logging.py:68:log_dist] [Rank 0] step=2800, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:27:35,842] [INFO] [timer.py:197:stop] 0/2800, RunningAvgSamplesPerSec=29.973744215868035, CurrSamplesPerSec=34.2251508372452, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 69.01} +[2022-12-14 21:28:19,614] [INFO] [timer.py:197:stop] 0/2801, RunningAvgSamplesPerSec=29.973395540746505, CurrSamplesPerSec=29.028566435600673, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:28:26,463] [INFO] [timer.py:197:stop] 0/2802, RunningAvgSamplesPerSec=29.973399037771323, CurrSamplesPerSec=29.983190408880287, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:28:33,907] [INFO] [timer.py:197:stop] 0/2803, RunningAvgSamplesPerSec=29.973612824169326, CurrSamplesPerSec=30.584417451779508, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:28:40,045] [INFO] [timer.py:197:stop] 0/2804, RunningAvgSamplesPerSec=29.973855148705685, CurrSamplesPerSec=30.668338197842463, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:28:46,823] [INFO] [timer.py:197:stop] 0/2805, RunningAvgSamplesPerSec=29.973562571698167, CurrSamplesPerSec=29.175594643224933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:28:53,359] [INFO] [timer.py:197:stop] 0/2806, RunningAvgSamplesPerSec=29.97364248195521, CurrSamplesPerSec=30.1993179725659, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:29:00,447] [INFO] [timer.py:197:stop] 0/2807, RunningAvgSamplesPerSec=29.97355562207476, CurrSamplesPerSec=29.731964307747653, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:29:06,897] [INFO] [timer.py:197:stop] 0/2808, RunningAvgSamplesPerSec=29.973485451438307, CurrSamplesPerSec=29.777941364727827, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:29:14,311] [INFO] [timer.py:197:stop] 0/2809, RunningAvgSamplesPerSec=29.973849298696255, CurrSamplesPerSec=31.03081956782691, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:29:20,839] [INFO] [logging.py:68:log_dist] [Rank 0] step=2810, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:29:20,840] [INFO] [timer.py:197:stop] 0/2810, RunningAvgSamplesPerSec=29.973865592392876, CurrSamplesPerSec=30.019671918483425, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:29:27,874] [INFO] [timer.py:197:stop] 0/2811, RunningAvgSamplesPerSec=29.97379975053342, CurrSamplesPerSec=29.790049617630185, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:29:34,864] [INFO] [timer.py:197:stop] 0/2812, RunningAvgSamplesPerSec=29.973712962126772, CurrSamplesPerSec=29.731891859322484, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:29:41,932] [INFO] [timer.py:197:stop] 0/2813, RunningAvgSamplesPerSec=29.973301825376073, CurrSamplesPerSec=28.86089937778787, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:29:48,582] [INFO] [timer.py:197:stop] 0/2814, RunningAvgSamplesPerSec=29.973376400882497, CurrSamplesPerSec=30.184485154883077, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:29:55,182] [INFO] [timer.py:197:stop] 0/2815, RunningAvgSamplesPerSec=29.973594838106234, CurrSamplesPerSec=30.600695994084038, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:30:01,896] [INFO] [timer.py:197:stop] 0/2816, RunningAvgSamplesPerSec=29.973377374680062, CurrSamplesPerSec=29.37389196322008, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:30:08,668] [INFO] [timer.py:197:stop] 0/2817, RunningAvgSamplesPerSec=29.973317636373658, CurrSamplesPerSec=29.806151914459203, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:30:15,396] [INFO] [timer.py:197:stop] 0/2818, RunningAvgSamplesPerSec=29.973261986351922, CurrSamplesPerSec=29.817421960096585, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:30:23,528] [INFO] [timer.py:197:stop] 0/2819, RunningAvgSamplesPerSec=29.973260303537668, CurrSamplesPerSec=29.96852224796302, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:30:30,126] [INFO] [logging.py:68:log_dist] [Rank 0] step=2820, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:30:30,127] [INFO] [timer.py:197:stop] 0/2820, RunningAvgSamplesPerSec=29.973237886818655, CurrSamplesPerSec=29.91022279727046, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:30:36,810] [INFO] [timer.py:197:stop] 0/2821, RunningAvgSamplesPerSec=29.973266651805318, CurrSamplesPerSec=30.054546274920902, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:30:43,131] [INFO] [timer.py:197:stop] 0/2822, RunningAvgSamplesPerSec=29.973210149548212, CurrSamplesPerSec=29.814772534669533, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:30:50,865] [INFO] [timer.py:197:stop] 0/2823, RunningAvgSamplesPerSec=29.97305233590503, CurrSamplesPerSec=29.534531252956914, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:30:57,848] [INFO] [timer.py:197:stop] 0/2824, RunningAvgSamplesPerSec=29.972929444153664, CurrSamplesPerSec=29.630217151753232, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:05,192] [INFO] [timer.py:197:stop] 0/2825, RunningAvgSamplesPerSec=29.97298874847258, CurrSamplesPerSec=30.141285569373853, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 70.0} +[2022-12-14 21:31:11,609] [INFO] [timer.py:197:stop] 0/2826, RunningAvgSamplesPerSec=29.972758588384334, CurrSamplesPerSec=29.336807529951777, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:18,379] [INFO] [timer.py:197:stop] 0/2827, RunningAvgSamplesPerSec=29.97268297739382, CurrSamplesPerSec=29.760668466705383, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:25,823] [INFO] [timer.py:197:stop] 0/2828, RunningAvgSamplesPerSec=29.97219547406591, CurrSamplesPerSec=28.655520102650925, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:32,534] [INFO] [timer.py:197:stop] 0/2829, RunningAvgSamplesPerSec=29.971940964755778, CurrSamplesPerSec=29.269558838217417, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:39,288] [INFO] [logging.py:68:log_dist] [Rank 0] step=2830, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:31:39,289] [INFO] [timer.py:197:stop] 0/2830, RunningAvgSamplesPerSec=29.97207850445606, CurrSamplesPerSec=30.366015550233282, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:45,580] [INFO] [timer.py:197:stop] 0/2831, RunningAvgSamplesPerSec=29.971663009253316, CurrSamplesPerSec=28.84098575009428, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:52,125] [INFO] [timer.py:197:stop] 0/2832, RunningAvgSamplesPerSec=29.97161238051963, CurrSamplesPerSec=29.829065140118082, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:54,622] [INFO] [timer.py:197:stop] 0/2833, RunningAvgSamplesPerSec=29.971705902635353, CurrSamplesPerSec=30.238732321858926, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:56,781] [INFO] [timer.py:197:stop] 0/2834, RunningAvgSamplesPerSec=29.97185752424659, CurrSamplesPerSec=30.407337213947965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:31:58,956] [INFO] [timer.py:197:stop] 0/2835, RunningAvgSamplesPerSec=29.971926651281688, CurrSamplesPerSec=30.168981976301822, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:32:01,064] [INFO] [timer.py:197:stop] 0/2836, RunningAvgSamplesPerSec=29.97218530334114, CurrSamplesPerSec=30.723316764314387, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:32:03,242] [INFO] [timer.py:197:stop] 0/2837, RunningAvgSamplesPerSec=29.972104128591955, CurrSamplesPerSec=29.74380778375961, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:32:05,417] [INFO] [timer.py:197:stop] 0/2838, RunningAvgSamplesPerSec=29.97207559172474, CurrSamplesPerSec=29.891391436423483, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:32:07,562] [INFO] [timer.py:197:stop] 0/2839, RunningAvgSamplesPerSec=29.97215971869312, CurrSamplesPerSec=30.21265889517536, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:32:09,454] [INFO] [logging.py:68:log_dist] [Rank 0] step=2840, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:32:09,454] [INFO] [timer.py:197:stop] 0/2840, RunningAvgSamplesPerSec=29.97349010450593, CurrSamplesPerSec=34.29174743452661, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:32:47,700] [INFO] [timer.py:197:stop] 0/2841, RunningAvgSamplesPerSec=29.973190901325744, CurrSamplesPerSec=29.147453604772775, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:32:53,944] [INFO] [timer.py:197:stop] 0/2842, RunningAvgSamplesPerSec=29.972767612352023, CurrSamplesPerSec=28.81738983501003, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:00,041] [INFO] [timer.py:197:stop] 0/2843, RunningAvgSamplesPerSec=29.972609836721542, CurrSamplesPerSec=29.531129388998686, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:05,891] [INFO] [timer.py:197:stop] 0/2844, RunningAvgSamplesPerSec=29.972824830487095, CurrSamplesPerSec=30.596332663585553, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:11,637] [INFO] [timer.py:197:stop] 0/2845, RunningAvgSamplesPerSec=29.972681352223862, CurrSamplesPerSec=29.57039103769771, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:17,586] [INFO] [timer.py:197:stop] 0/2846, RunningAvgSamplesPerSec=29.972654994449908, CurrSamplesPerSec=29.89790678810029, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:23,614] [INFO] [timer.py:197:stop] 0/2847, RunningAvgSamplesPerSec=29.97242185159504, CurrSamplesPerSec=29.32371939508919, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:29,520] [INFO] [timer.py:197:stop] 0/2848, RunningAvgSamplesPerSec=29.972217597042185, CurrSamplesPerSec=29.402169427009444, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:35,377] [INFO] [timer.py:197:stop] 0/2849, RunningAvgSamplesPerSec=29.972168612640573, CurrSamplesPerSec=29.833404665466006, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:41,215] [INFO] [logging.py:68:log_dist] [Rank 0] step=2850, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:33:41,216] [INFO] [timer.py:197:stop] 0/2850, RunningAvgSamplesPerSec=29.972343953826694, CurrSamplesPerSec=30.479998383092965, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 71.0} +[2022-12-14 21:33:47,220] [INFO] [timer.py:197:stop] 0/2851, RunningAvgSamplesPerSec=29.97232914721115, CurrSamplesPerSec=29.930219173102934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:53,221] [INFO] [timer.py:197:stop] 0/2852, RunningAvgSamplesPerSec=29.972238778462145, CurrSamplesPerSec=29.716971718028503, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:33:59,348] [INFO] [timer.py:197:stop] 0/2853, RunningAvgSamplesPerSec=29.97176623946422, CurrSamplesPerSec=28.68296093862529, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:34:05,593] [INFO] [timer.py:197:stop] 0/2854, RunningAvgSamplesPerSec=29.97169753936389, CurrSamplesPerSec=29.77710565050222, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:34:14,161] [INFO] [timer.py:197:stop] 0/2855, RunningAvgSamplesPerSec=29.971674547184254, CurrSamplesPerSec=29.906244053521096, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:34:20,391] [INFO] [timer.py:197:stop] 0/2856, RunningAvgSamplesPerSec=29.971617564025927, CurrSamplesPerSec=29.809921994717765, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:34:26,109] [INFO] [timer.py:197:stop] 0/2857, RunningAvgSamplesPerSec=29.97197749307147, CurrSamplesPerSec=31.03568447306195, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:34:31,843] [INFO] [timer.py:197:stop] 0/2858, RunningAvgSamplesPerSec=29.97204295782859, CurrSamplesPerSec=30.160118062839135, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:34:38,234] [INFO] [timer.py:197:stop] 0/2859, RunningAvgSamplesPerSec=29.97162288389038, CurrSamplesPerSec=28.818082826444336, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:34:44,552] [INFO] [logging.py:68:log_dist] [Rank 0] step=2860, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:34:44,552] [INFO] [timer.py:197:stop] 0/2860, RunningAvgSamplesPerSec=29.971485417028763, CurrSamplesPerSec=29.58382423798652, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:34:50,799] [INFO] [timer.py:197:stop] 0/2861, RunningAvgSamplesPerSec=29.971421751968432, CurrSamplesPerSec=29.790565362232726, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:34:56,671] [INFO] [timer.py:197:stop] 0/2862, RunningAvgSamplesPerSec=29.97144559954857, CurrSamplesPerSec=30.039781338381975, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:02,876] [INFO] [timer.py:197:stop] 0/2863, RunningAvgSamplesPerSec=29.97165094473771, CurrSamplesPerSec=30.57068012145571, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:08,914] [INFO] [timer.py:197:stop] 0/2864, RunningAvgSamplesPerSec=29.971570814220314, CurrSamplesPerSec=29.744058262287776, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:14,942] [INFO] [timer.py:197:stop] 0/2865, RunningAvgSamplesPerSec=29.971709988243724, CurrSamplesPerSec=30.375392754343622, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:20,681] [INFO] [timer.py:197:stop] 0/2866, RunningAvgSamplesPerSec=29.971762164679728, CurrSamplesPerSec=30.121891817892106, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:27,277] [INFO] [timer.py:197:stop] 0/2867, RunningAvgSamplesPerSec=29.971735928900767, CurrSamplesPerSec=29.896784626796187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:33,463] [INFO] [timer.py:197:stop] 0/2868, RunningAvgSamplesPerSec=29.97158177662985, CurrSamplesPerSec=29.536351100776955, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:39,230] [INFO] [timer.py:197:stop] 0/2869, RunningAvgSamplesPerSec=29.971583816500427, CurrSamplesPerSec=29.977431226572754, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:45,288] [INFO] [logging.py:68:log_dist] [Rank 0] step=2870, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:35:45,288] [INFO] [timer.py:197:stop] 0/2870, RunningAvgSamplesPerSec=29.971554632292545, CurrSamplesPerSec=29.888116522426934, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:51,273] [INFO] [timer.py:197:stop] 0/2871, RunningAvgSamplesPerSec=29.971310822216402, CurrSamplesPerSec=29.288010849534714, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:57,507] [INFO] [timer.py:197:stop] 0/2872, RunningAvgSamplesPerSec=29.971371474796218, CurrSamplesPerSec=30.146400288846127, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:35:59,930] [INFO] [timer.py:197:stop] 0/2873, RunningAvgSamplesPerSec=29.97151168533928, CurrSamplesPerSec=30.379394174105936, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:36:02,080] [INFO] [timer.py:197:stop] 0/2874, RunningAvgSamplesPerSec=29.971724598520108, CurrSamplesPerSec=30.595729359301316, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:36:04,206] [INFO] [timer.py:197:stop] 0/2875, RunningAvgSamplesPerSec=29.97189470549518, CurrSamplesPerSec=30.46854016628359, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 71.01} +[2022-12-14 21:36:06,353] [INFO] [timer.py:197:stop] 0/2876, RunningAvgSamplesPerSec=29.971966174281036, CurrSamplesPerSec=30.17871285453477, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:36:08,525] [INFO] [timer.py:197:stop] 0/2877, RunningAvgSamplesPerSec=29.971915261929958, CurrSamplesPerSec=29.826304281231565, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:36:10,703] [INFO] [timer.py:197:stop] 0/2878, RunningAvgSamplesPerSec=29.971853341559108, CurrSamplesPerSec=29.79488377090361, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:36:12,826] [INFO] [timer.py:197:stop] 0/2879, RunningAvgSamplesPerSec=29.97203747022088, CurrSamplesPerSec=30.511119459010644, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:36:14,721] [INFO] [logging.py:68:log_dist] [Rank 0] step=2880, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:36:14,722] [INFO] [timer.py:197:stop] 0/2880, RunningAvgSamplesPerSec=29.973308718627603, CurrSamplesPerSec=34.139193714334404, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:36:56,083] [INFO] [timer.py:197:stop] 0/2881, RunningAvgSamplesPerSec=29.973123940956015, CurrSamplesPerSec=29.450607629289085, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:37:02,794] [INFO] [timer.py:197:stop] 0/2882, RunningAvgSamplesPerSec=29.9732548901446, CurrSamplesPerSec=30.355061625979594, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:37:09,176] [INFO] [timer.py:197:stop] 0/2883, RunningAvgSamplesPerSec=29.972960834369847, CurrSamplesPerSec=29.14935900512609, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:37:15,638] [INFO] [timer.py:197:stop] 0/2884, RunningAvgSamplesPerSec=29.972929796014743, CurrSamplesPerSec=29.883774374352154, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:37:21,903] [INFO] [timer.py:197:stop] 0/2885, RunningAvgSamplesPerSec=29.973123165817892, CurrSamplesPerSec=30.540976731623278, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:37:28,680] [INFO] [timer.py:197:stop] 0/2886, RunningAvgSamplesPerSec=29.972771512229013, CurrSamplesPerSec=28.9921353559481, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:37:35,129] [INFO] [timer.py:197:stop] 0/2887, RunningAvgSamplesPerSec=29.972732486793994, CurrSamplesPerSec=29.860604325945292, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:37:41,355] [INFO] [timer.py:197:stop] 0/2888, RunningAvgSamplesPerSec=29.972784593092754, CurrSamplesPerSec=30.123869283715564, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:37:47,883] [INFO] [timer.py:197:stop] 0/2889, RunningAvgSamplesPerSec=29.972770031172058, CurrSamplesPerSec=29.93080319137964, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:37:53,996] [INFO] [logging.py:68:log_dist] [Rank 0] step=2890, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:37:53,996] [INFO] [timer.py:197:stop] 0/2890, RunningAvgSamplesPerSec=29.972685407938904, CurrSamplesPerSec=29.730354060132665, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:00,607] [INFO] [timer.py:197:stop] 0/2891, RunningAvgSamplesPerSec=29.971873434664516, CurrSamplesPerSec=27.797105858501805, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:06,942] [INFO] [timer.py:197:stop] 0/2892, RunningAvgSamplesPerSec=29.971651201818375, CurrSamplesPerSec=29.343089751541456, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:13,440] [INFO] [timer.py:197:stop] 0/2893, RunningAvgSamplesPerSec=29.971746105803746, CurrSamplesPerSec=30.2485525658067, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:19,889] [INFO] [timer.py:197:stop] 0/2894, RunningAvgSamplesPerSec=29.971639499483548, CurrSamplesPerSec=29.666578658600134, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:26,233] [INFO] [timer.py:197:stop] 0/2895, RunningAvgSamplesPerSec=29.971403461404407, CurrSamplesPerSec=29.3039875122989, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:33,102] [INFO] [timer.py:197:stop] 0/2896, RunningAvgSamplesPerSec=29.9713989632528, CurrSamplesPerSec=29.958391460263055, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:40,098] [INFO] [timer.py:197:stop] 0/2897, RunningAvgSamplesPerSec=29.97144749931771, CurrSamplesPerSec=30.11257249227598, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:46,667] [INFO] [timer.py:197:stop] 0/2898, RunningAvgSamplesPerSec=29.971315175395493, CurrSamplesPerSec=29.593073577801864, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:52,822] [INFO] [timer.py:197:stop] 0/2899, RunningAvgSamplesPerSec=29.971498831227038, CurrSamplesPerSec=30.51297840878103, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:38:59,330] [INFO] [logging.py:68:log_dist] [Rank 0] step=2900, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:38:59,331] [INFO] [timer.py:197:stop] 0/2900, RunningAvgSamplesPerSec=29.971296767704153, CurrSamplesPerSec=29.397136698452048, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 72.0} +[2022-12-14 21:39:06,306] [INFO] [timer.py:197:stop] 0/2901, RunningAvgSamplesPerSec=29.97123068791564, CurrSamplesPerSec=29.78094768120933, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:39:12,460] [INFO] [timer.py:197:stop] 0/2902, RunningAvgSamplesPerSec=29.971189075216845, CurrSamplesPerSec=29.851037642479845, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:39:19,497] [INFO] [timer.py:197:stop] 0/2903, RunningAvgSamplesPerSec=29.971210207469987, CurrSamplesPerSec=30.032619351027563, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:39:25,502] [INFO] [timer.py:197:stop] 0/2904, RunningAvgSamplesPerSec=29.97119251376042, CurrSamplesPerSec=29.919950850376758, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:39:32,166] [INFO] [timer.py:197:stop] 0/2905, RunningAvgSamplesPerSec=29.971225310735267, CurrSamplesPerSec=30.066705443561418, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:39:38,624] [INFO] [timer.py:197:stop] 0/2906, RunningAvgSamplesPerSec=29.9711102834771, CurrSamplesPerSec=29.640866838879, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:39:45,672] [INFO] [timer.py:197:stop] 0/2907, RunningAvgSamplesPerSec=29.971173493141528, CurrSamplesPerSec=30.155865911746204, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:39:52,392] [INFO] [timer.py:197:stop] 0/2908, RunningAvgSamplesPerSec=29.97120703658628, CurrSamplesPerSec=30.068968700030524, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:39:58,727] [INFO] [timer.py:197:stop] 0/2909, RunningAvgSamplesPerSec=29.971519035329887, CurrSamplesPerSec=30.906480638219726, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:05,645] [INFO] [logging.py:68:log_dist] [Rank 0] step=2910, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:40:05,646] [INFO] [timer.py:197:stop] 0/2910, RunningAvgSamplesPerSec=29.971528023178713, CurrSamplesPerSec=29.99767850423885, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:11,959] [INFO] [timer.py:197:stop] 0/2911, RunningAvgSamplesPerSec=29.97113953023312, CurrSamplesPerSec=28.88245328982457, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:18,214] [INFO] [timer.py:197:stop] 0/2912, RunningAvgSamplesPerSec=29.971290796780135, CurrSamplesPerSec=30.417884260053516, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:20,663] [INFO] [timer.py:197:stop] 0/2913, RunningAvgSamplesPerSec=29.97155145845482, CurrSamplesPerSec=30.749779229938905, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:22,835] [INFO] [timer.py:197:stop] 0/2914, RunningAvgSamplesPerSec=29.97149999111939, CurrSamplesPerSec=29.82242403396552, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:25,004] [INFO] [timer.py:197:stop] 0/2915, RunningAvgSamplesPerSec=29.971461566885647, CurrSamplesPerSec=29.85998650693758, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:27,084] [INFO] [timer.py:197:stop] 0/2916, RunningAvgSamplesPerSec=29.971854857959144, CurrSamplesPerSec=31.163060537394273, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:29,253] [INFO] [timer.py:197:stop] 0/2917, RunningAvgSamplesPerSec=29.97181662444371, CurrSamplesPerSec=29.86081691474161, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:31,414] [INFO] [timer.py:197:stop] 0/2918, RunningAvgSamplesPerSec=29.971817372069832, CurrSamplesPerSec=29.97399686073904, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:33,546] [INFO] [timer.py:197:stop] 0/2919, RunningAvgSamplesPerSec=29.971958704646052, CurrSamplesPerSec=30.38983238892761, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:40:35,456] [INFO] [logging.py:68:log_dist] [Rank 0] step=2920, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:40:35,456] [INFO] [timer.py:197:stop] 0/2920, RunningAvgSamplesPerSec=29.97315697146431, CurrSamplesPerSec=33.930101846380396, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:41:18,440] [INFO] [timer.py:197:stop] 0/2921, RunningAvgSamplesPerSec=29.973209118643528, CurrSamplesPerSec=30.126151297699987, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:41:25,211] [INFO] [timer.py:197:stop] 0/2922, RunningAvgSamplesPerSec=29.9731869581533, CurrSamplesPerSec=29.90863983690134, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:41:32,000] [INFO] [timer.py:197:stop] 0/2923, RunningAvgSamplesPerSec=29.97328813499417, CurrSamplesPerSec=30.271666530589968, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:41:38,839] [INFO] [timer.py:197:stop] 0/2924, RunningAvgSamplesPerSec=29.972858681537076, CurrSamplesPerSec=28.768833621823187, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:41:45,522] [INFO] [timer.py:197:stop] 0/2925, RunningAvgSamplesPerSec=29.97274050119368, CurrSamplesPerSec=29.631352099585282, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 73.0} +[2022-12-14 21:41:52,177] [INFO] [timer.py:197:stop] 0/2926, RunningAvgSamplesPerSec=29.972564446487546, CurrSamplesPerSec=29.46664578075035, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:41:59,008] [INFO] [timer.py:197:stop] 0/2927, RunningAvgSamplesPerSec=29.97277013931907, CurrSamplesPerSec=30.58653627250417, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:42:05,531] [INFO] [timer.py:197:stop] 0/2928, RunningAvgSamplesPerSec=29.97295741894137, CurrSamplesPerSec=30.530951839349246, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:42:12,412] [INFO] [timer.py:197:stop] 0/2929, RunningAvgSamplesPerSec=29.972987155922315, CurrSamplesPerSec=30.060250972181816, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:42:18,970] [INFO] [logging.py:68:log_dist] [Rank 0] step=2930, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:42:18,971] [INFO] [timer.py:197:stop] 0/2930, RunningAvgSamplesPerSec=29.97303321084147, CurrSamplesPerSec=30.108445178237744, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:42:26,142] [INFO] [timer.py:197:stop] 0/2931, RunningAvgSamplesPerSec=29.97292642636666, CurrSamplesPerSec=29.663490491863787, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:42:32,973] [INFO] [timer.py:197:stop] 0/2932, RunningAvgSamplesPerSec=29.972814650963734, CurrSamplesPerSec=29.64896310263029, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:42:39,328] [INFO] [timer.py:197:stop] 0/2933, RunningAvgSamplesPerSec=29.972850142603775, CurrSamplesPerSec=30.077202822088047, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:42:45,894] [INFO] [timer.py:197:stop] 0/2934, RunningAvgSamplesPerSec=29.972841048348098, CurrSamplesPerSec=29.94620947685728, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:42:53,438] [INFO] [timer.py:197:stop] 0/2935, RunningAvgSamplesPerSec=29.972822639087447, CurrSamplesPerSec=29.918943746663263, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:42:59,921] [INFO] [timer.py:197:stop] 0/2936, RunningAvgSamplesPerSec=29.972677237712244, CurrSamplesPerSec=29.552199751767887, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:43:06,870] [INFO] [timer.py:197:stop] 0/2937, RunningAvgSamplesPerSec=29.973053852859568, CurrSamplesPerSec=31.12035354729808, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:43:13,956] [INFO] [timer.py:197:stop] 0/2938, RunningAvgSamplesPerSec=29.9729954443175, CurrSamplesPerSec=29.802541607856796, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:43:20,568] [INFO] [timer.py:197:stop] 0/2939, RunningAvgSamplesPerSec=29.97304528319972, CurrSamplesPerSec=30.120090352824164, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:43:27,256] [INFO] [logging.py:68:log_dist] [Rank 0] step=2940, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:43:27,257] [INFO] [timer.py:197:stop] 0/2940, RunningAvgSamplesPerSec=29.97317020699012, CurrSamplesPerSec=30.34461983744712, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:43:33,891] [INFO] [timer.py:197:stop] 0/2941, RunningAvgSamplesPerSec=29.973135707651036, CurrSamplesPerSec=29.872118372331478, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:43:41,120] [INFO] [timer.py:197:stop] 0/2942, RunningAvgSamplesPerSec=29.97308080985915, CurrSamplesPerSec=29.812600357126435, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:43:48,361] [INFO] [timer.py:197:stop] 0/2943, RunningAvgSamplesPerSec=29.973305599726395, CurrSamplesPerSec=30.64909333939536, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:43:55,395] [INFO] [timer.py:197:stop] 0/2944, RunningAvgSamplesPerSec=29.973397334945663, CurrSamplesPerSec=30.245641946561523, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:01,867] [INFO] [timer.py:197:stop] 0/2945, RunningAvgSamplesPerSec=29.97350395487341, CurrSamplesPerSec=30.290498288485658, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:08,601] [INFO] [timer.py:197:stop] 0/2946, RunningAvgSamplesPerSec=29.97327775291872, CurrSamplesPerSec=29.32203455370363, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:15,088] [INFO] [timer.py:197:stop] 0/2947, RunningAvgSamplesPerSec=29.973498988076155, CurrSamplesPerSec=30.639287618797162, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:21,988] [INFO] [timer.py:197:stop] 0/2948, RunningAvgSamplesPerSec=29.97366814052425, CurrSamplesPerSec=30.48024410951136, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:28,934] [INFO] [timer.py:197:stop] 0/2949, RunningAvgSamplesPerSec=29.97358323115238, CurrSamplesPerSec=29.72551119686519, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:35,374] [INFO] [logging.py:68:log_dist] [Rank 0] step=2950, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:44:35,374] [INFO] [timer.py:197:stop] 0/2950, RunningAvgSamplesPerSec=29.97352210067205, CurrSamplesPerSec=29.79444724393368, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 73.01} +[2022-12-14 21:44:41,928] [INFO] [timer.py:197:stop] 0/2951, RunningAvgSamplesPerSec=29.973227861300003, CurrSamplesPerSec=29.130215046785583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:48,173] [INFO] [timer.py:197:stop] 0/2952, RunningAvgSamplesPerSec=29.973109011112996, CurrSamplesPerSec=29.626672226268433, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:50,877] [INFO] [timer.py:197:stop] 0/2953, RunningAvgSamplesPerSec=29.97325444110032, CurrSamplesPerSec=30.408504919319356, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:53,040] [INFO] [timer.py:197:stop] 0/2954, RunningAvgSamplesPerSec=29.97337811603285, CurrSamplesPerSec=30.34284308119131, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:55,171] [INFO] [timer.py:197:stop] 0/2955, RunningAvgSamplesPerSec=29.973519579208574, CurrSamplesPerSec=30.397021213616295, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:57,287] [INFO] [timer.py:197:stop] 0/2956, RunningAvgSamplesPerSec=29.973732164976532, CurrSamplesPerSec=30.614931680923235, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:44:59,481] [INFO] [timer.py:197:stop] 0/2957, RunningAvgSamplesPerSec=29.973576128207462, CurrSamplesPerSec=29.51962669422641, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:45:01,615] [INFO] [timer.py:197:stop] 0/2958, RunningAvgSamplesPerSec=29.973704554466387, CurrSamplesPerSec=30.358072308941026, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:45:03,754] [INFO] [timer.py:197:stop] 0/2959, RunningAvgSamplesPerSec=29.97381185253629, CurrSamplesPerSec=30.29437822343103, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:45:05,637] [INFO] [logging.py:68:log_dist] [Rank 0] step=2960, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:45:05,637] [INFO] [timer.py:197:stop] 0/2960, RunningAvgSamplesPerSec=29.975141319754105, CurrSamplesPerSec=34.500012659472404, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:45:42,793] [INFO] [timer.py:197:stop] 0/2961, RunningAvgSamplesPerSec=29.97502070434814, CurrSamplesPerSec=29.62243839479067, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:45:48,868] [INFO] [timer.py:197:stop] 0/2962, RunningAvgSamplesPerSec=29.975237471015248, CurrSamplesPerSec=30.630679991154334, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:45:54,971] [INFO] [timer.py:197:stop] 0/2963, RunningAvgSamplesPerSec=29.97496154858027, CurrSamplesPerSec=29.17990152975321, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:00,738] [INFO] [timer.py:197:stop] 0/2964, RunningAvgSamplesPerSec=29.97485774244099, CurrSamplesPerSec=29.670608666740943, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:06,642] [INFO] [timer.py:197:stop] 0/2965, RunningAvgSamplesPerSec=29.97468363977312, CurrSamplesPerSec=29.467716474580975, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:12,866] [INFO] [timer.py:197:stop] 0/2966, RunningAvgSamplesPerSec=29.9746762339204, CurrSamplesPerSec=29.952748750162353, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:19,033] [INFO] [timer.py:197:stop] 0/2967, RunningAvgSamplesPerSec=29.974657202316276, CurrSamplesPerSec=29.918353522025146, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:24,875] [INFO] [timer.py:197:stop] 0/2968, RunningAvgSamplesPerSec=29.974563399997084, CurrSamplesPerSec=29.698997271239502, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:31,555] [INFO] [timer.py:197:stop] 0/2969, RunningAvgSamplesPerSec=29.974457093396094, CurrSamplesPerSec=29.662435023924058, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:37,832] [INFO] [logging.py:68:log_dist] [Rank 0] step=2970, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:46:37,832] [INFO] [timer.py:197:stop] 0/2970, RunningAvgSamplesPerSec=29.97419030406398, CurrSamplesPerSec=29.202998982490413, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:44,367] [INFO] [timer.py:197:stop] 0/2971, RunningAvgSamplesPerSec=29.97435032622201, CurrSamplesPerSec=30.456945422099746, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:50,688] [INFO] [timer.py:197:stop] 0/2972, RunningAvgSamplesPerSec=29.97430248561614, CurrSamplesPerSec=29.832933853762583, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:46:56,800] [INFO] [timer.py:197:stop] 0/2973, RunningAvgSamplesPerSec=29.973716747951016, CurrSamplesPerSec=28.329534504013154, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:47:03,330] [INFO] [timer.py:197:stop] 0/2974, RunningAvgSamplesPerSec=29.973618000062668, CurrSamplesPerSec=29.683082722813214, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:47:09,242] [INFO] [timer.py:197:stop] 0/2975, RunningAvgSamplesPerSec=29.973513714162245, CurrSamplesPerSec=29.666749149376333, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 74.0} +[2022-12-14 21:47:15,334] [INFO] [timer.py:197:stop] 0/2976, RunningAvgSamplesPerSec=29.973525523052107, CurrSamplesPerSec=30.008674536307602, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:47:21,248] [INFO] [timer.py:197:stop] 0/2977, RunningAvgSamplesPerSec=29.973355096785575, CurrSamplesPerSec=29.474938411664386, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:47:27,188] [INFO] [timer.py:197:stop] 0/2978, RunningAvgSamplesPerSec=29.97337154715584, CurrSamplesPerSec=30.02239146420912, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:47:33,033] [INFO] [timer.py:197:stop] 0/2979, RunningAvgSamplesPerSec=29.97355808821133, CurrSamplesPerSec=30.539183855961465, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:47:38,843] [INFO] [logging.py:68:log_dist] [Rank 0] step=2980, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:47:38,844] [INFO] [timer.py:197:stop] 0/2980, RunningAvgSamplesPerSec=29.97358790209652, CurrSamplesPerSec=30.06260752626155, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:47:44,859] [INFO] [timer.py:197:stop] 0/2981, RunningAvgSamplesPerSec=29.973713493797263, CurrSamplesPerSec=30.35245307920243, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:47:50,677] [INFO] [timer.py:197:stop] 0/2982, RunningAvgSamplesPerSec=29.97365339218705, CurrSamplesPerSec=29.795674181828836, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:47:57,126] [INFO] [timer.py:197:stop] 0/2983, RunningAvgSamplesPerSec=29.973997068449894, CurrSamplesPerSec=31.034396344529252, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:03,411] [INFO] [timer.py:197:stop] 0/2984, RunningAvgSamplesPerSec=29.973822706905118, CurrSamplesPerSec=29.462913517026358, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:09,237] [INFO] [timer.py:197:stop] 0/2985, RunningAvgSamplesPerSec=29.97404878289863, CurrSamplesPerSec=30.66372437364136, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:14,982] [INFO] [timer.py:197:stop] 0/2986, RunningAvgSamplesPerSec=29.974191927374576, CurrSamplesPerSec=30.407364769383694, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:21,817] [INFO] [timer.py:197:stop] 0/2987, RunningAvgSamplesPerSec=29.97419610828066, CurrSamplesPerSec=29.986677128594128, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:28,213] [INFO] [timer.py:197:stop] 0/2988, RunningAvgSamplesPerSec=29.974035364182853, CurrSamplesPerSec=29.501776631810298, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:34,572] [INFO] [timer.py:197:stop] 0/2989, RunningAvgSamplesPerSec=29.973836635950452, CurrSamplesPerSec=29.391957653141, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:40,394] [INFO] [logging.py:68:log_dist] [Rank 0] step=2990, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:48:40,395] [INFO] [timer.py:197:stop] 0/2990, RunningAvgSamplesPerSec=29.973684671140376, CurrSamplesPerSec=29.52653957705629, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:46,274] [INFO] [timer.py:197:stop] 0/2991, RunningAvgSamplesPerSec=29.973753616928413, CurrSamplesPerSec=30.181189820568193, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:52,328] [INFO] [timer.py:197:stop] 0/2992, RunningAvgSamplesPerSec=29.97343576767414, CurrSamplesPerSec=29.05258190739425, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:54,794] [INFO] [timer.py:197:stop] 0/2993, RunningAvgSamplesPerSec=29.97303952439231, CurrSamplesPerSec=28.83333707487052, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:56,981] [INFO] [timer.py:197:stop] 0/2994, RunningAvgSamplesPerSec=29.972918688897945, CurrSamplesPerSec=29.615807279958737, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:48:59,169] [INFO] [timer.py:197:stop] 0/2995, RunningAvgSamplesPerSec=29.972792373343946, CurrSamplesPerSec=29.59956396029992, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:49:01,411] [INFO] [timer.py:197:stop] 0/2996, RunningAvgSamplesPerSec=29.972535627630474, CurrSamplesPerSec=29.223310851193958, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:49:03,593] [INFO] [timer.py:197:stop] 0/2997, RunningAvgSamplesPerSec=29.972602749807223, CurrSamplesPerSec=30.17492354410049, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:49:05,775] [INFO] [timer.py:197:stop] 0/2998, RunningAvgSamplesPerSec=29.972512409279336, CurrSamplesPerSec=29.704363983988763, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:49:07,959] [INFO] [timer.py:197:stop] 0/2999, RunningAvgSamplesPerSec=29.972415846194714, CurrSamplesPerSec=29.685879504961555, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +[2022-12-14 21:49:09,880] [INFO] [logging.py:68:log_dist] [Rank 0] step=3000, skipped=6, lr=[1e-05], mom=[[0.9, 0.999]] +[2022-12-14 21:49:09,881] [INFO] [timer.py:197:stop] 0/3000, RunningAvgSamplesPerSec=29.97352191818894, CurrSamplesPerSec=33.70076916490631, MemAllocated=0.53GB, MaxMemAllocated=17.47GB +{'loss': 0.0002, 'learning_rate': 1e-05, 'epoch': 74.01} +{'eval_loss': 0.59375, 'eval_wer': 24.817682037473354, 'eval_runtime': 205.8889, 'eval_samples_per_second': 3.235, 'eval_steps_per_second': 0.102, 'epoch': 74.01} +[2022-12-14 21:52:36,354] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step3000 is begin to save! +[2022-12-14 21:52:36,359] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-3000/global_step3000/mp_rank_00_model_states.pt +[2022-12-14 21:52:36,359] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-3000/global_step3000/mp_rank_00_model_states.pt... +[2022-12-14 21:52:36,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-3000/global_step3000/mp_rank_00_model_states.pt. +[2022-12-14 21:52:36,957] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt... +[2022-12-14 21:52:39,199] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt. +[2022-12-14 21:52:39,200] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-3000/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt +[2022-12-14 21:52:39,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now!