rafflesia-ckpt-46496-ja / trainer_state.json
rmdhirr's picture
Upload folder using huggingface_hub
6a73da9 verified
{
"best_global_step": 2000,
"best_metric": 1.3367302417755127,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 1000,
"global_step": 5812,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017205781142463867,
"grad_norm": 0.9484581351280212,
"learning_rate": 1.4000000000000001e-05,
"loss": 1.1583,
"step": 50
},
{
"epoch": 0.034411562284927734,
"grad_norm": 1.0050328969955444,
"learning_rate": 2.8285714285714287e-05,
"loss": 0.9511,
"step": 100
},
{
"epoch": 0.051617343427391604,
"grad_norm": 1.2752665281295776,
"learning_rate": 4.257142857142857e-05,
"loss": 0.8969,
"step": 150
},
{
"epoch": 0.06882312456985547,
"grad_norm": 1.1626160144805908,
"learning_rate": 4.978712080894093e-05,
"loss": 0.8897,
"step": 200
},
{
"epoch": 0.08602890571231935,
"grad_norm": 1.154097080230713,
"learning_rate": 4.934362249423453e-05,
"loss": 0.909,
"step": 250
},
{
"epoch": 0.10323468685478321,
"grad_norm": 1.112544298171997,
"learning_rate": 4.890012417952812e-05,
"loss": 0.8691,
"step": 300
},
{
"epoch": 0.12044046799724707,
"grad_norm": 1.2551391124725342,
"learning_rate": 4.845662586482171e-05,
"loss": 0.8942,
"step": 350
},
{
"epoch": 0.13764624913971094,
"grad_norm": 1.0854212045669556,
"learning_rate": 4.801312755011531e-05,
"loss": 0.8984,
"step": 400
},
{
"epoch": 0.1548520302821748,
"grad_norm": 1.198875904083252,
"learning_rate": 4.7569629235408906e-05,
"loss": 0.8765,
"step": 450
},
{
"epoch": 0.1720578114246387,
"grad_norm": 1.3993616104125977,
"learning_rate": 4.7126130920702504e-05,
"loss": 0.9302,
"step": 500
},
{
"epoch": 0.18926359256710254,
"grad_norm": 1.1944853067398071,
"learning_rate": 4.66826326059961e-05,
"loss": 0.9117,
"step": 550
},
{
"epoch": 0.20646937370956642,
"grad_norm": 1.3922518491744995,
"learning_rate": 4.623913429128969e-05,
"loss": 0.9291,
"step": 600
},
{
"epoch": 0.2236751548520303,
"grad_norm": 1.1352382898330688,
"learning_rate": 4.579563597658329e-05,
"loss": 0.9108,
"step": 650
},
{
"epoch": 0.24088093599449414,
"grad_norm": 1.2245187759399414,
"learning_rate": 4.535213766187689e-05,
"loss": 0.9009,
"step": 700
},
{
"epoch": 0.258086717136958,
"grad_norm": 1.303236961364746,
"learning_rate": 4.4908639347170486e-05,
"loss": 0.9326,
"step": 750
},
{
"epoch": 0.27529249827942187,
"grad_norm": 1.1667600870132446,
"learning_rate": 4.4465141032464084e-05,
"loss": 0.8955,
"step": 800
},
{
"epoch": 0.2924982794218858,
"grad_norm": 1.166150689125061,
"learning_rate": 4.4021642717757675e-05,
"loss": 0.918,
"step": 850
},
{
"epoch": 0.3097040605643496,
"grad_norm": 1.330957055091858,
"learning_rate": 4.3578144403051266e-05,
"loss": 0.9135,
"step": 900
},
{
"epoch": 0.3269098417068135,
"grad_norm": 1.064183235168457,
"learning_rate": 4.313464608834486e-05,
"loss": 0.946,
"step": 950
},
{
"epoch": 0.3441156228492774,
"grad_norm": 1.189034342765808,
"learning_rate": 4.269114777363846e-05,
"loss": 0.9749,
"step": 1000
},
{
"epoch": 0.3441156228492774,
"eval_loss": 1.4126514196395874,
"eval_runtime": 93.1504,
"eval_samples_per_second": 7.88,
"eval_steps_per_second": 1.578,
"step": 1000
},
{
"epoch": 0.36132140399174123,
"grad_norm": 1.0131909847259521,
"learning_rate": 4.224764945893206e-05,
"loss": 0.9431,
"step": 1050
},
{
"epoch": 0.3785271851342051,
"grad_norm": 1.244815707206726,
"learning_rate": 4.1804151144225656e-05,
"loss": 1.0062,
"step": 1100
},
{
"epoch": 0.395732966276669,
"grad_norm": 0.9802664518356323,
"learning_rate": 4.136065282951925e-05,
"loss": 1.0486,
"step": 1150
},
{
"epoch": 0.41293874741913283,
"grad_norm": 0.9956826567649841,
"learning_rate": 4.0917154514812845e-05,
"loss": 0.9844,
"step": 1200
},
{
"epoch": 0.4301445285615967,
"grad_norm": 1.0612707138061523,
"learning_rate": 4.047365620010644e-05,
"loss": 1.0091,
"step": 1250
},
{
"epoch": 0.4473503097040606,
"grad_norm": 1.0374691486358643,
"learning_rate": 4.003015788540004e-05,
"loss": 1.0007,
"step": 1300
},
{
"epoch": 0.46455609084652444,
"grad_norm": 1.2885727882385254,
"learning_rate": 3.958665957069364e-05,
"loss": 0.9555,
"step": 1350
},
{
"epoch": 0.4817618719889883,
"grad_norm": 1.1126559972763062,
"learning_rate": 3.914316125598723e-05,
"loss": 1.0084,
"step": 1400
},
{
"epoch": 0.4989676531314522,
"grad_norm": 1.1720352172851562,
"learning_rate": 3.869966294128082e-05,
"loss": 0.9789,
"step": 1450
},
{
"epoch": 0.516173434273916,
"grad_norm": 1.0672069787979126,
"learning_rate": 3.825616462657442e-05,
"loss": 1.0046,
"step": 1500
},
{
"epoch": 0.5333792154163799,
"grad_norm": 1.0774304866790771,
"learning_rate": 3.7812666311868016e-05,
"loss": 0.9957,
"step": 1550
},
{
"epoch": 0.5505849965588437,
"grad_norm": 1.1217703819274902,
"learning_rate": 3.736916799716161e-05,
"loss": 1.0262,
"step": 1600
},
{
"epoch": 0.5677907777013076,
"grad_norm": 1.037427544593811,
"learning_rate": 3.692566968245521e-05,
"loss": 1.0392,
"step": 1650
},
{
"epoch": 0.5849965588437716,
"grad_norm": 1.1435151100158691,
"learning_rate": 3.64821713677488e-05,
"loss": 1.0341,
"step": 1700
},
{
"epoch": 0.6022023399862354,
"grad_norm": 1.0763335227966309,
"learning_rate": 3.60386730530424e-05,
"loss": 1.0967,
"step": 1750
},
{
"epoch": 0.6194081211286993,
"grad_norm": 0.9355671405792236,
"learning_rate": 3.5595174738336e-05,
"loss": 1.0745,
"step": 1800
},
{
"epoch": 0.6366139022711631,
"grad_norm": 1.1394997835159302,
"learning_rate": 3.5151676423629595e-05,
"loss": 1.0604,
"step": 1850
},
{
"epoch": 0.653819683413627,
"grad_norm": 0.8916899561882019,
"learning_rate": 3.4708178108923186e-05,
"loss": 1.0343,
"step": 1900
},
{
"epoch": 0.6710254645560908,
"grad_norm": 1.0358752012252808,
"learning_rate": 3.4264679794216784e-05,
"loss": 1.1059,
"step": 1950
},
{
"epoch": 0.6882312456985548,
"grad_norm": 1.090041160583496,
"learning_rate": 3.3821181479510375e-05,
"loss": 1.0659,
"step": 2000
},
{
"epoch": 0.6882312456985548,
"eval_loss": 1.3367302417755127,
"eval_runtime": 88.2266,
"eval_samples_per_second": 8.319,
"eval_steps_per_second": 1.666,
"step": 2000
},
{
"epoch": 0.7054370268410186,
"grad_norm": 1.0861823558807373,
"learning_rate": 3.337768316480397e-05,
"loss": 1.0877,
"step": 2050
},
{
"epoch": 0.7226428079834825,
"grad_norm": 1.1247456073760986,
"learning_rate": 3.293418485009757e-05,
"loss": 1.0547,
"step": 2100
},
{
"epoch": 0.7398485891259463,
"grad_norm": 0.9415215253829956,
"learning_rate": 3.249068653539117e-05,
"loss": 1.1066,
"step": 2150
},
{
"epoch": 0.7570543702684102,
"grad_norm": 1.0296528339385986,
"learning_rate": 3.2047188220684766e-05,
"loss": 1.1037,
"step": 2200
},
{
"epoch": 0.774260151410874,
"grad_norm": 1.0104950666427612,
"learning_rate": 3.1603689905978357e-05,
"loss": 1.1236,
"step": 2250
},
{
"epoch": 0.791465932553338,
"grad_norm": 1.1287598609924316,
"learning_rate": 3.1160191591271954e-05,
"loss": 1.0993,
"step": 2300
},
{
"epoch": 0.8086717136958018,
"grad_norm": 0.9809098839759827,
"learning_rate": 3.071669327656555e-05,
"loss": 1.1639,
"step": 2350
},
{
"epoch": 0.8258774948382657,
"grad_norm": 0.9065077304840088,
"learning_rate": 3.0273194961859146e-05,
"loss": 1.0698,
"step": 2400
},
{
"epoch": 0.8430832759807295,
"grad_norm": 0.8680762648582458,
"learning_rate": 2.9829696647152744e-05,
"loss": 1.1864,
"step": 2450
},
{
"epoch": 0.8602890571231934,
"grad_norm": 1.0787030458450317,
"learning_rate": 2.9386198332446342e-05,
"loss": 1.1552,
"step": 2500
},
{
"epoch": 0.8774948382656572,
"grad_norm": 1.146647572517395,
"learning_rate": 2.8942700017739933e-05,
"loss": 1.2046,
"step": 2550
},
{
"epoch": 0.8947006194081212,
"grad_norm": 1.0998808145523071,
"learning_rate": 2.849920170303353e-05,
"loss": 1.1504,
"step": 2600
},
{
"epoch": 0.911906400550585,
"grad_norm": 0.9578316807746887,
"learning_rate": 2.8055703388327125e-05,
"loss": 1.1814,
"step": 2650
},
{
"epoch": 0.9291121816930489,
"grad_norm": 0.8427146077156067,
"learning_rate": 2.7612205073620722e-05,
"loss": 1.1914,
"step": 2700
},
{
"epoch": 0.9463179628355127,
"grad_norm": 1.0207325220108032,
"learning_rate": 2.716870675891432e-05,
"loss": 1.1949,
"step": 2750
},
{
"epoch": 0.9635237439779766,
"grad_norm": 0.8344902992248535,
"learning_rate": 2.672520844420791e-05,
"loss": 1.1986,
"step": 2800
},
{
"epoch": 0.9807295251204404,
"grad_norm": 1.0717881917953491,
"learning_rate": 2.628171012950151e-05,
"loss": 1.2419,
"step": 2850
},
{
"epoch": 0.9979353062629044,
"grad_norm": 1.0195544958114624,
"learning_rate": 2.5838211814795103e-05,
"loss": 1.1612,
"step": 2900
},
{
"epoch": 1.0151410874053681,
"grad_norm": 0.899029552936554,
"learning_rate": 2.53947135000887e-05,
"loss": 0.9398,
"step": 2950
},
{
"epoch": 1.032346868547832,
"grad_norm": 1.1363555192947388,
"learning_rate": 2.4951215185382295e-05,
"loss": 0.8889,
"step": 3000
},
{
"epoch": 1.032346868547832,
"eval_loss": 1.3666342496871948,
"eval_runtime": 88.3327,
"eval_samples_per_second": 8.309,
"eval_steps_per_second": 1.664,
"step": 3000
},
{
"epoch": 1.049552649690296,
"grad_norm": 1.237770438194275,
"learning_rate": 2.4507716870675893e-05,
"loss": 0.8746,
"step": 3050
},
{
"epoch": 1.0667584308327598,
"grad_norm": 1.1103781461715698,
"learning_rate": 2.406421855596949e-05,
"loss": 0.8944,
"step": 3100
},
{
"epoch": 1.0839642119752237,
"grad_norm": 1.2465671300888062,
"learning_rate": 2.3620720241263085e-05,
"loss": 0.9278,
"step": 3150
},
{
"epoch": 1.1011699931176875,
"grad_norm": 1.304677963256836,
"learning_rate": 2.317722192655668e-05,
"loss": 0.8646,
"step": 3200
},
{
"epoch": 1.1183757742601514,
"grad_norm": 1.1867053508758545,
"learning_rate": 2.2733723611850277e-05,
"loss": 0.8941,
"step": 3250
},
{
"epoch": 1.1355815554026152,
"grad_norm": 1.0053008794784546,
"learning_rate": 2.229022529714387e-05,
"loss": 0.9093,
"step": 3300
},
{
"epoch": 1.1527873365450791,
"grad_norm": 1.0691931247711182,
"learning_rate": 2.184672698243747e-05,
"loss": 0.8739,
"step": 3350
},
{
"epoch": 1.169993117687543,
"grad_norm": 1.2377207279205322,
"learning_rate": 2.1403228667731063e-05,
"loss": 0.9417,
"step": 3400
},
{
"epoch": 1.1871988988300068,
"grad_norm": 1.21890127658844,
"learning_rate": 2.0959730353024658e-05,
"loss": 0.8924,
"step": 3450
},
{
"epoch": 1.2044046799724708,
"grad_norm": 1.099365472793579,
"learning_rate": 2.0516232038318255e-05,
"loss": 0.9046,
"step": 3500
},
{
"epoch": 1.2216104611149345,
"grad_norm": 1.188915729522705,
"learning_rate": 2.007273372361185e-05,
"loss": 0.9054,
"step": 3550
},
{
"epoch": 1.2388162422573985,
"grad_norm": 1.258689522743225,
"learning_rate": 1.9629235408905447e-05,
"loss": 0.9272,
"step": 3600
},
{
"epoch": 1.2560220233998622,
"grad_norm": 1.1899304389953613,
"learning_rate": 1.9185737094199045e-05,
"loss": 0.9213,
"step": 3650
},
{
"epoch": 1.2732278045423262,
"grad_norm": 1.2941292524337769,
"learning_rate": 1.874223877949264e-05,
"loss": 0.9083,
"step": 3700
},
{
"epoch": 1.2904335856847902,
"grad_norm": 1.148829460144043,
"learning_rate": 1.8298740464786234e-05,
"loss": 0.9208,
"step": 3750
},
{
"epoch": 1.307639366827254,
"grad_norm": 1.3640356063842773,
"learning_rate": 1.785524215007983e-05,
"loss": 0.9632,
"step": 3800
},
{
"epoch": 1.3248451479697179,
"grad_norm": 1.049970030784607,
"learning_rate": 1.7411743835373426e-05,
"loss": 0.9223,
"step": 3850
},
{
"epoch": 1.3420509291121818,
"grad_norm": 1.2103326320648193,
"learning_rate": 1.6968245520667024e-05,
"loss": 0.9937,
"step": 3900
},
{
"epoch": 1.3592567102546456,
"grad_norm": 1.09535551071167,
"learning_rate": 1.6524747205960618e-05,
"loss": 0.9386,
"step": 3950
},
{
"epoch": 1.3764624913971093,
"grad_norm": 1.1930817365646362,
"learning_rate": 1.6081248891254212e-05,
"loss": 0.9157,
"step": 4000
},
{
"epoch": 1.3764624913971093,
"eval_loss": 1.3664780855178833,
"eval_runtime": 88.454,
"eval_samples_per_second": 8.298,
"eval_steps_per_second": 1.662,
"step": 4000
},
{
"epoch": 1.3936682725395733,
"grad_norm": 1.2864257097244263,
"learning_rate": 1.563775057654781e-05,
"loss": 0.946,
"step": 4050
},
{
"epoch": 1.4108740536820372,
"grad_norm": 1.079478144645691,
"learning_rate": 1.5194252261841404e-05,
"loss": 0.8982,
"step": 4100
},
{
"epoch": 1.428079834824501,
"grad_norm": 1.1406025886535645,
"learning_rate": 1.4750753947135002e-05,
"loss": 0.9076,
"step": 4150
},
{
"epoch": 1.445285615966965,
"grad_norm": 1.044668436050415,
"learning_rate": 1.4307255632428598e-05,
"loss": 0.9332,
"step": 4200
},
{
"epoch": 1.4624913971094289,
"grad_norm": 1.1297789812088013,
"learning_rate": 1.3863757317722192e-05,
"loss": 0.9203,
"step": 4250
},
{
"epoch": 1.4796971782518926,
"grad_norm": 1.0997716188430786,
"learning_rate": 1.342025900301579e-05,
"loss": 0.9333,
"step": 4300
},
{
"epoch": 1.4969029593943566,
"grad_norm": 1.374189853668213,
"learning_rate": 1.2976760688309386e-05,
"loss": 0.9235,
"step": 4350
},
{
"epoch": 1.5141087405368203,
"grad_norm": 1.198410153388977,
"learning_rate": 1.253326237360298e-05,
"loss": 0.8935,
"step": 4400
},
{
"epoch": 1.5313145216792843,
"grad_norm": 1.1307648420333862,
"learning_rate": 1.2089764058896576e-05,
"loss": 0.941,
"step": 4450
},
{
"epoch": 1.548520302821748,
"grad_norm": 1.1348686218261719,
"learning_rate": 1.1646265744190172e-05,
"loss": 0.998,
"step": 4500
},
{
"epoch": 1.565726083964212,
"grad_norm": 1.3584885597229004,
"learning_rate": 1.1202767429483768e-05,
"loss": 0.9083,
"step": 4550
},
{
"epoch": 1.582931865106676,
"grad_norm": 1.2940605878829956,
"learning_rate": 1.0759269114777365e-05,
"loss": 0.9553,
"step": 4600
},
{
"epoch": 1.6001376462491397,
"grad_norm": 1.2229249477386475,
"learning_rate": 1.031577080007096e-05,
"loss": 0.939,
"step": 4650
},
{
"epoch": 1.6173434273916034,
"grad_norm": 1.356358528137207,
"learning_rate": 9.872272485364557e-06,
"loss": 0.9297,
"step": 4700
},
{
"epoch": 1.6345492085340676,
"grad_norm": 0.916768491268158,
"learning_rate": 9.428774170658151e-06,
"loss": 0.9648,
"step": 4750
},
{
"epoch": 1.6517549896765313,
"grad_norm": 1.5708458423614502,
"learning_rate": 8.985275855951749e-06,
"loss": 0.9427,
"step": 4800
},
{
"epoch": 1.668960770818995,
"grad_norm": 0.9711500406265259,
"learning_rate": 8.541777541245345e-06,
"loss": 0.9696,
"step": 4850
},
{
"epoch": 1.686166551961459,
"grad_norm": 1.191889762878418,
"learning_rate": 8.098279226538939e-06,
"loss": 0.932,
"step": 4900
},
{
"epoch": 1.703372333103923,
"grad_norm": 1.3063703775405884,
"learning_rate": 7.654780911832535e-06,
"loss": 0.933,
"step": 4950
},
{
"epoch": 1.7205781142463867,
"grad_norm": 1.094841718673706,
"learning_rate": 7.211282597126132e-06,
"loss": 0.9449,
"step": 5000
},
{
"epoch": 1.7205781142463867,
"eval_loss": 1.3542050123214722,
"eval_runtime": 88.544,
"eval_samples_per_second": 8.29,
"eval_steps_per_second": 1.66,
"step": 5000
},
{
"epoch": 1.7377838953888507,
"grad_norm": 1.335047960281372,
"learning_rate": 6.767784282419727e-06,
"loss": 0.9855,
"step": 5050
},
{
"epoch": 1.7549896765313147,
"grad_norm": 1.2741748094558716,
"learning_rate": 6.324285967713322e-06,
"loss": 0.9373,
"step": 5100
},
{
"epoch": 1.7721954576737784,
"grad_norm": 1.4646645784378052,
"learning_rate": 5.880787653006919e-06,
"loss": 0.9481,
"step": 5150
},
{
"epoch": 1.7894012388162421,
"grad_norm": 1.2502957582473755,
"learning_rate": 5.437289338300515e-06,
"loss": 0.9703,
"step": 5200
},
{
"epoch": 1.806607019958706,
"grad_norm": 1.335482120513916,
"learning_rate": 4.99379102359411e-06,
"loss": 0.9671,
"step": 5250
},
{
"epoch": 1.82381280110117,
"grad_norm": 1.1999086141586304,
"learning_rate": 4.550292708887706e-06,
"loss": 0.9621,
"step": 5300
},
{
"epoch": 1.8410185822436338,
"grad_norm": 1.1440843343734741,
"learning_rate": 4.106794394181302e-06,
"loss": 0.9651,
"step": 5350
},
{
"epoch": 1.8582243633860978,
"grad_norm": 1.1589412689208984,
"learning_rate": 3.6632960794748983e-06,
"loss": 0.9231,
"step": 5400
},
{
"epoch": 1.8754301445285617,
"grad_norm": 1.1917223930358887,
"learning_rate": 3.219797764768494e-06,
"loss": 1.0207,
"step": 5450
},
{
"epoch": 1.8926359256710255,
"grad_norm": 1.1760448217391968,
"learning_rate": 2.77629945006209e-06,
"loss": 0.9558,
"step": 5500
},
{
"epoch": 1.9098417068134892,
"grad_norm": 1.25649893283844,
"learning_rate": 2.3328011353556856e-06,
"loss": 0.9408,
"step": 5550
},
{
"epoch": 1.9270474879559532,
"grad_norm": 1.1310392618179321,
"learning_rate": 1.8893028206492816e-06,
"loss": 0.9771,
"step": 5600
},
{
"epoch": 1.9442532690984171,
"grad_norm": 0.9325273036956787,
"learning_rate": 1.4458045059428774e-06,
"loss": 0.9757,
"step": 5650
},
{
"epoch": 1.9614590502408809,
"grad_norm": 1.2769732475280762,
"learning_rate": 1.0023061912364732e-06,
"loss": 0.9719,
"step": 5700
},
{
"epoch": 1.9786648313833448,
"grad_norm": 1.5237048864364624,
"learning_rate": 5.588078765300692e-07,
"loss": 0.9612,
"step": 5750
},
{
"epoch": 1.9958706125258088,
"grad_norm": 1.3760077953338623,
"learning_rate": 1.1530956182366508e-07,
"loss": 0.962,
"step": 5800
}
],
"logging_steps": 50,
"max_steps": 5812,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 3000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 8,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.144678634728463e+18,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}