gerqwen-audio / trainer_state.json
flozi00's picture
Upload folder using huggingface_hub
8468e49 verified
raw
history blame
19.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.09423752273112121,
"eval_steps": 100000,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000235593806827803,
"grad_norm": 103.0,
"learning_rate": 1e-05,
"loss": 0.7800231,
"memory(GiB)": 63.62,
"step": 1,
"train_speed(iter/s)": 0.015931
},
{
"epoch": 0.001177969034139015,
"grad_norm": 5.8125,
"learning_rate": 9.99997807127629e-06,
"loss": 0.41946995,
"memory(GiB)": 75.24,
"step": 5,
"train_speed(iter/s)": 0.017972
},
{
"epoch": 0.00235593806827803,
"grad_norm": 2.703125,
"learning_rate": 9.999888986165874e-06,
"loss": 0.0869894,
"memory(GiB)": 75.24,
"step": 10,
"train_speed(iter/s)": 0.018238
},
{
"epoch": 0.003533907102417045,
"grad_norm": 2.140625,
"learning_rate": 9.99973137534353e-06,
"loss": 0.06987351,
"memory(GiB)": 75.24,
"step": 15,
"train_speed(iter/s)": 0.018317
},
{
"epoch": 0.00471187613655606,
"grad_norm": 2.515625,
"learning_rate": 9.999505240969388e-06,
"loss": 0.0606461,
"memory(GiB)": 75.24,
"step": 20,
"train_speed(iter/s)": 0.01837
},
{
"epoch": 0.005889845170695076,
"grad_norm": 2.4375,
"learning_rate": 9.999210586142718e-06,
"loss": 0.06591458,
"memory(GiB)": 75.24,
"step": 25,
"train_speed(iter/s)": 0.018407
},
{
"epoch": 0.00706781420483409,
"grad_norm": 2.8125,
"learning_rate": 9.998847414901898e-06,
"loss": 0.06059705,
"memory(GiB)": 75.24,
"step": 30,
"train_speed(iter/s)": 0.018432
},
{
"epoch": 0.008245783238973105,
"grad_norm": 1.9921875,
"learning_rate": 9.998415732224352e-06,
"loss": 0.06047676,
"memory(GiB)": 75.24,
"step": 35,
"train_speed(iter/s)": 0.018453
},
{
"epoch": 0.00942375227311212,
"grad_norm": 1.921875,
"learning_rate": 9.997915544026483e-06,
"loss": 0.06190881,
"memory(GiB)": 75.24,
"step": 40,
"train_speed(iter/s)": 0.018469
},
{
"epoch": 0.010601721307251136,
"grad_norm": 1.859375,
"learning_rate": 9.997346857163591e-06,
"loss": 0.05765554,
"memory(GiB)": 75.24,
"step": 45,
"train_speed(iter/s)": 0.018482
},
{
"epoch": 0.011779690341390151,
"grad_norm": 2.5625,
"learning_rate": 9.99670967942979e-06,
"loss": 0.0662235,
"memory(GiB)": 75.24,
"step": 50,
"train_speed(iter/s)": 0.01849
},
{
"epoch": 0.012957659375529167,
"grad_norm": 2.390625,
"learning_rate": 9.996004019557879e-06,
"loss": 0.06362078,
"memory(GiB)": 75.24,
"step": 55,
"train_speed(iter/s)": 0.0185
},
{
"epoch": 0.01413562840966818,
"grad_norm": 2.875,
"learning_rate": 9.995229887219246e-06,
"loss": 0.06171583,
"memory(GiB)": 75.24,
"step": 60,
"train_speed(iter/s)": 0.018512
},
{
"epoch": 0.015313597443807196,
"grad_norm": 2.109375,
"learning_rate": 9.99438729302372e-06,
"loss": 0.06211852,
"memory(GiB)": 75.24,
"step": 65,
"train_speed(iter/s)": 0.018519
},
{
"epoch": 0.01649156647794621,
"grad_norm": 1.828125,
"learning_rate": 9.993476248519429e-06,
"loss": 0.06484153,
"memory(GiB)": 75.24,
"step": 70,
"train_speed(iter/s)": 0.01852
},
{
"epoch": 0.017669535512085225,
"grad_norm": 1.90625,
"learning_rate": 9.992496766192645e-06,
"loss": 0.06099743,
"memory(GiB)": 75.24,
"step": 75,
"train_speed(iter/s)": 0.018526
},
{
"epoch": 0.01884750454622424,
"grad_norm": 1.796875,
"learning_rate": 9.991448859467611e-06,
"loss": 0.05843818,
"memory(GiB)": 75.24,
"step": 80,
"train_speed(iter/s)": 0.018543
},
{
"epoch": 0.020025473580363256,
"grad_norm": 1.8203125,
"learning_rate": 9.99033254270636e-06,
"loss": 0.05953899,
"memory(GiB)": 75.24,
"step": 85,
"train_speed(iter/s)": 0.018546
},
{
"epoch": 0.02120344261450227,
"grad_norm": 1.9609375,
"learning_rate": 9.989147831208508e-06,
"loss": 0.06501681,
"memory(GiB)": 75.24,
"step": 90,
"train_speed(iter/s)": 0.018554
},
{
"epoch": 0.022381411648641287,
"grad_norm": 2.609375,
"learning_rate": 9.987894741211056e-06,
"loss": 0.06521546,
"memory(GiB)": 75.24,
"step": 95,
"train_speed(iter/s)": 0.01856
},
{
"epoch": 0.023559380682780302,
"grad_norm": 2.046875,
"learning_rate": 9.986573289888164e-06,
"loss": 0.06153967,
"memory(GiB)": 75.24,
"step": 100,
"train_speed(iter/s)": 0.018562
},
{
"epoch": 0.024737349716919318,
"grad_norm": 2.109375,
"learning_rate": 9.98518349535091e-06,
"loss": 0.07089446,
"memory(GiB)": 75.24,
"step": 105,
"train_speed(iter/s)": 0.018452
},
{
"epoch": 0.025915318751058333,
"grad_norm": 1.7578125,
"learning_rate": 9.98372537664705e-06,
"loss": 0.05478874,
"memory(GiB)": 75.24,
"step": 110,
"train_speed(iter/s)": 0.018463
},
{
"epoch": 0.027093287785197345,
"grad_norm": 2.9375,
"learning_rate": 9.982198953760752e-06,
"loss": 0.06532571,
"memory(GiB)": 75.24,
"step": 115,
"train_speed(iter/s)": 0.018473
},
{
"epoch": 0.02827125681933636,
"grad_norm": 2.234375,
"learning_rate": 9.980604247612325e-06,
"loss": 0.06488043,
"memory(GiB)": 75.24,
"step": 120,
"train_speed(iter/s)": 0.018478
},
{
"epoch": 0.029449225853475376,
"grad_norm": 2.28125,
"learning_rate": 9.978941280057928e-06,
"loss": 0.06263313,
"memory(GiB)": 75.24,
"step": 125,
"train_speed(iter/s)": 0.018482
},
{
"epoch": 0.03062719488761439,
"grad_norm": 2.21875,
"learning_rate": 9.977210073889273e-06,
"loss": 0.0654664,
"memory(GiB)": 75.24,
"step": 130,
"train_speed(iter/s)": 0.018487
},
{
"epoch": 0.03180516392175341,
"grad_norm": 2.171875,
"learning_rate": 9.975410652833316e-06,
"loss": 0.06672717,
"memory(GiB)": 75.24,
"step": 135,
"train_speed(iter/s)": 0.018489
},
{
"epoch": 0.03298313295589242,
"grad_norm": 2.875,
"learning_rate": 9.973543041551924e-06,
"loss": 0.06413687,
"memory(GiB)": 75.24,
"step": 140,
"train_speed(iter/s)": 0.01849
},
{
"epoch": 0.03416110199003144,
"grad_norm": 1.9453125,
"learning_rate": 9.971607265641547e-06,
"loss": 0.0582508,
"memory(GiB)": 75.24,
"step": 145,
"train_speed(iter/s)": 0.018495
},
{
"epoch": 0.03533907102417045,
"grad_norm": 1.9375,
"learning_rate": 9.969603351632855e-06,
"loss": 0.06022533,
"memory(GiB)": 75.24,
"step": 150,
"train_speed(iter/s)": 0.0185
},
{
"epoch": 0.03651704005830947,
"grad_norm": 2.109375,
"learning_rate": 9.967531326990387e-06,
"loss": 0.06132371,
"memory(GiB)": 75.24,
"step": 155,
"train_speed(iter/s)": 0.018504
},
{
"epoch": 0.03769500909244848,
"grad_norm": 2.078125,
"learning_rate": 9.965391220112165e-06,
"loss": 0.07101279,
"memory(GiB)": 75.24,
"step": 160,
"train_speed(iter/s)": 0.018506
},
{
"epoch": 0.0388729781265875,
"grad_norm": 2.140625,
"learning_rate": 9.96318306032931e-06,
"loss": 0.0588982,
"memory(GiB)": 75.24,
"step": 165,
"train_speed(iter/s)": 0.018505
},
{
"epoch": 0.04005094716072651,
"grad_norm": 2.125,
"learning_rate": 9.96090687790564e-06,
"loss": 0.06118761,
"memory(GiB)": 75.24,
"step": 170,
"train_speed(iter/s)": 0.018511
},
{
"epoch": 0.04122891619486553,
"grad_norm": 1.8671875,
"learning_rate": 9.95856270403725e-06,
"loss": 0.06012461,
"memory(GiB)": 75.24,
"step": 175,
"train_speed(iter/s)": 0.018517
},
{
"epoch": 0.04240688522900454,
"grad_norm": 2.234375,
"learning_rate": 9.956150570852088e-06,
"loss": 0.0591939,
"memory(GiB)": 75.24,
"step": 180,
"train_speed(iter/s)": 0.01852
},
{
"epoch": 0.043584854263143555,
"grad_norm": 2.234375,
"learning_rate": 9.95367051140952e-06,
"loss": 0.06429687,
"memory(GiB)": 75.24,
"step": 185,
"train_speed(iter/s)": 0.018524
},
{
"epoch": 0.044762823297282574,
"grad_norm": 1.59375,
"learning_rate": 9.951122559699868e-06,
"loss": 0.05647093,
"memory(GiB)": 75.24,
"step": 190,
"train_speed(iter/s)": 0.018525
},
{
"epoch": 0.045940792331421586,
"grad_norm": 1.9140625,
"learning_rate": 9.948506750643946e-06,
"loss": 0.05816346,
"memory(GiB)": 75.24,
"step": 195,
"train_speed(iter/s)": 0.018525
},
{
"epoch": 0.047118761365560605,
"grad_norm": 2.546875,
"learning_rate": 9.94582312009259e-06,
"loss": 0.05947306,
"memory(GiB)": 75.24,
"step": 200,
"train_speed(iter/s)": 0.018527
},
{
"epoch": 0.04829673039969962,
"grad_norm": 2.359375,
"learning_rate": 9.943071704826153e-06,
"loss": 0.06321282,
"memory(GiB)": 75.24,
"step": 205,
"train_speed(iter/s)": 0.018454
},
{
"epoch": 0.049474699433838636,
"grad_norm": 2.203125,
"learning_rate": 9.940252542554007e-06,
"loss": 0.06456767,
"memory(GiB)": 75.24,
"step": 210,
"train_speed(iter/s)": 0.018455
},
{
"epoch": 0.05065266846797765,
"grad_norm": 2.15625,
"learning_rate": 9.937365671914037e-06,
"loss": 0.06057892,
"memory(GiB)": 75.24,
"step": 215,
"train_speed(iter/s)": 0.018456
},
{
"epoch": 0.05183063750211667,
"grad_norm": 2.0,
"learning_rate": 9.934411132472088e-06,
"loss": 0.05920454,
"memory(GiB)": 75.24,
"step": 220,
"train_speed(iter/s)": 0.018458
},
{
"epoch": 0.05300860653625568,
"grad_norm": 2.015625,
"learning_rate": 9.931388964721446e-06,
"loss": 0.05975649,
"memory(GiB)": 75.24,
"step": 225,
"train_speed(iter/s)": 0.018461
},
{
"epoch": 0.05418657557039469,
"grad_norm": 2.0,
"learning_rate": 9.92829921008227e-06,
"loss": 0.06393375,
"memory(GiB)": 75.24,
"step": 230,
"train_speed(iter/s)": 0.018462
},
{
"epoch": 0.05536454460453371,
"grad_norm": 2.28125,
"learning_rate": 9.925141910901029e-06,
"loss": 0.06334119,
"memory(GiB)": 75.24,
"step": 235,
"train_speed(iter/s)": 0.018466
},
{
"epoch": 0.05654251363867272,
"grad_norm": 2.09375,
"learning_rate": 9.921917110449914e-06,
"loss": 0.06911048,
"memory(GiB)": 75.24,
"step": 240,
"train_speed(iter/s)": 0.018468
},
{
"epoch": 0.05772048267281174,
"grad_norm": 1.984375,
"learning_rate": 9.918624852926258e-06,
"loss": 0.05916922,
"memory(GiB)": 75.24,
"step": 245,
"train_speed(iter/s)": 0.01847
},
{
"epoch": 0.05889845170695075,
"grad_norm": 1.859375,
"learning_rate": 9.915265183451923e-06,
"loss": 0.06251335,
"memory(GiB)": 75.24,
"step": 250,
"train_speed(iter/s)": 0.018471
},
{
"epoch": 0.06007642074108977,
"grad_norm": 1.8515625,
"learning_rate": 9.911838148072678e-06,
"loss": 0.06203491,
"memory(GiB)": 75.24,
"step": 255,
"train_speed(iter/s)": 0.018477
},
{
"epoch": 0.06125438977522878,
"grad_norm": 2.265625,
"learning_rate": 9.908343793757574e-06,
"loss": 0.06085759,
"memory(GiB)": 75.24,
"step": 260,
"train_speed(iter/s)": 0.01848
},
{
"epoch": 0.062432358809367795,
"grad_norm": 2.375,
"learning_rate": 9.904782168398296e-06,
"loss": 0.06250409,
"memory(GiB)": 75.24,
"step": 265,
"train_speed(iter/s)": 0.018484
},
{
"epoch": 0.06361032784350681,
"grad_norm": 1.9609375,
"learning_rate": 9.901153320808514e-06,
"loss": 0.05536562,
"memory(GiB)": 75.24,
"step": 270,
"train_speed(iter/s)": 0.018489
},
{
"epoch": 0.06478829687764583,
"grad_norm": 1.8359375,
"learning_rate": 9.897457300723202e-06,
"loss": 0.05569639,
"memory(GiB)": 75.24,
"step": 275,
"train_speed(iter/s)": 0.018491
},
{
"epoch": 0.06596626591178484,
"grad_norm": 2.40625,
"learning_rate": 9.893694158797968e-06,
"loss": 0.05840618,
"memory(GiB)": 75.24,
"step": 280,
"train_speed(iter/s)": 0.018494
},
{
"epoch": 0.06714423494592386,
"grad_norm": 2.265625,
"learning_rate": 9.889863946608352e-06,
"loss": 0.05661937,
"memory(GiB)": 75.24,
"step": 285,
"train_speed(iter/s)": 0.018496
},
{
"epoch": 0.06832220398006288,
"grad_norm": 2.140625,
"learning_rate": 9.885966716649125e-06,
"loss": 0.06150655,
"memory(GiB)": 75.24,
"step": 290,
"train_speed(iter/s)": 0.018497
},
{
"epoch": 0.06950017301420189,
"grad_norm": 2.09375,
"learning_rate": 9.88200252233356e-06,
"loss": 0.06209329,
"memory(GiB)": 75.24,
"step": 295,
"train_speed(iter/s)": 0.018497
},
{
"epoch": 0.0706781420483409,
"grad_norm": 3.375,
"learning_rate": 9.877971417992716e-06,
"loss": 0.05904433,
"memory(GiB)": 75.24,
"step": 300,
"train_speed(iter/s)": 0.018499
},
{
"epoch": 0.07185611108247993,
"grad_norm": 1.796875,
"learning_rate": 9.873873458874676e-06,
"loss": 0.05126434,
"memory(GiB)": 75.24,
"step": 305,
"train_speed(iter/s)": 0.018458
},
{
"epoch": 0.07303408011661894,
"grad_norm": 2.0,
"learning_rate": 9.8697087011438e-06,
"loss": 0.05796698,
"memory(GiB)": 75.24,
"step": 310,
"train_speed(iter/s)": 0.018459
},
{
"epoch": 0.07421204915075795,
"grad_norm": 1.875,
"learning_rate": 9.865477201879953e-06,
"loss": 0.05630487,
"memory(GiB)": 75.24,
"step": 315,
"train_speed(iter/s)": 0.01846
},
{
"epoch": 0.07539001818489696,
"grad_norm": 2.515625,
"learning_rate": 9.861179019077725e-06,
"loss": 0.0567848,
"memory(GiB)": 75.24,
"step": 320,
"train_speed(iter/s)": 0.018461
},
{
"epoch": 0.07656798721903597,
"grad_norm": 2.109375,
"learning_rate": 9.856814211645627e-06,
"loss": 0.05985626,
"memory(GiB)": 75.24,
"step": 325,
"train_speed(iter/s)": 0.018463
},
{
"epoch": 0.077745956253175,
"grad_norm": 2.09375,
"learning_rate": 9.852382839405298e-06,
"loss": 0.05782009,
"memory(GiB)": 75.24,
"step": 330,
"train_speed(iter/s)": 0.018466
},
{
"epoch": 0.07892392528731401,
"grad_norm": 2.28125,
"learning_rate": 9.847884963090675e-06,
"loss": 0.06585214,
"memory(GiB)": 75.24,
"step": 335,
"train_speed(iter/s)": 0.018468
},
{
"epoch": 0.08010189432145302,
"grad_norm": 2.234375,
"learning_rate": 9.843320644347156e-06,
"loss": 0.06263242,
"memory(GiB)": 75.24,
"step": 340,
"train_speed(iter/s)": 0.01847
},
{
"epoch": 0.08127986335559204,
"grad_norm": 2.203125,
"learning_rate": 9.838689945730776e-06,
"loss": 0.05163463,
"memory(GiB)": 75.24,
"step": 345,
"train_speed(iter/s)": 0.018472
},
{
"epoch": 0.08245783238973106,
"grad_norm": 2.015625,
"learning_rate": 9.833992930707321e-06,
"loss": 0.05960041,
"memory(GiB)": 75.24,
"step": 350,
"train_speed(iter/s)": 0.018475
},
{
"epoch": 0.08363580142387007,
"grad_norm": 2.5,
"learning_rate": 9.829229663651483e-06,
"loss": 0.05999585,
"memory(GiB)": 75.24,
"step": 355,
"train_speed(iter/s)": 0.018477
},
{
"epoch": 0.08481377045800909,
"grad_norm": 1.671875,
"learning_rate": 9.824400209845967e-06,
"loss": 0.05059795,
"memory(GiB)": 75.24,
"step": 360,
"train_speed(iter/s)": 0.018479
},
{
"epoch": 0.0859917394921481,
"grad_norm": 2.171875,
"learning_rate": 9.81950463548059e-06,
"loss": 0.05671123,
"memory(GiB)": 75.24,
"step": 365,
"train_speed(iter/s)": 0.018481
},
{
"epoch": 0.08716970852628711,
"grad_norm": 2.625,
"learning_rate": 9.814543007651389e-06,
"loss": 0.05803382,
"memory(GiB)": 75.24,
"step": 370,
"train_speed(iter/s)": 0.018483
},
{
"epoch": 0.08834767756042614,
"grad_norm": 1.890625,
"learning_rate": 9.80951539435969e-06,
"loss": 0.05704566,
"memory(GiB)": 75.24,
"step": 375,
"train_speed(iter/s)": 0.018485
},
{
"epoch": 0.08952564659456515,
"grad_norm": 2.03125,
"learning_rate": 9.804421864511175e-06,
"loss": 0.05998203,
"memory(GiB)": 75.24,
"step": 380,
"train_speed(iter/s)": 0.018487
},
{
"epoch": 0.09070361562870416,
"grad_norm": 2.53125,
"learning_rate": 9.79926248791495e-06,
"loss": 0.06044774,
"memory(GiB)": 75.24,
"step": 385,
"train_speed(iter/s)": 0.018488
},
{
"epoch": 0.09188158466284317,
"grad_norm": 2.1875,
"learning_rate": 9.794037335282572e-06,
"loss": 0.06596763,
"memory(GiB)": 75.24,
"step": 390,
"train_speed(iter/s)": 0.018489
},
{
"epoch": 0.0930595536969822,
"grad_norm": 2.171875,
"learning_rate": 9.788746478227097e-06,
"loss": 0.06313769,
"memory(GiB)": 75.24,
"step": 395,
"train_speed(iter/s)": 0.018489
},
{
"epoch": 0.09423752273112121,
"grad_norm": 1.9296875,
"learning_rate": 9.783389989262078e-06,
"loss": 0.05841722,
"memory(GiB)": 75.24,
"step": 400,
"train_speed(iter/s)": 0.018489
}
],
"logging_steps": 5,
"max_steps": 4244,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.4341415068565504e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}