rafflesia-ckpt-40684-ja / trainer_state.json
rmdhirr's picture
Upload folder using huggingface_hub
2819921 verified
{
"best_global_step": 2000,
"best_metric": 1.3034558296203613,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 1000,
"global_step": 2906,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017205781142463867,
"grad_norm": 0.9198477268218994,
"learning_rate": 2.784090909090909e-05,
"loss": 1.2213,
"step": 50
},
{
"epoch": 0.034411562284927734,
"grad_norm": 0.9541558623313904,
"learning_rate": 4.9804826117814056e-05,
"loss": 1.0981,
"step": 100
},
{
"epoch": 0.051617343427391604,
"grad_norm": 1.1445890665054321,
"learning_rate": 4.891767210787793e-05,
"loss": 1.0559,
"step": 150
},
{
"epoch": 0.06882312456985547,
"grad_norm": 0.9405988454818726,
"learning_rate": 4.8030518097941806e-05,
"loss": 1.0414,
"step": 200
},
{
"epoch": 0.08602890571231935,
"grad_norm": 0.9743571281433105,
"learning_rate": 4.714336408800568e-05,
"loss": 1.0525,
"step": 250
},
{
"epoch": 0.10323468685478321,
"grad_norm": 0.9441106915473938,
"learning_rate": 4.6256210078069556e-05,
"loss": 1.006,
"step": 300
},
{
"epoch": 0.12044046799724707,
"grad_norm": 1.0246022939682007,
"learning_rate": 4.536905606813343e-05,
"loss": 1.0293,
"step": 350
},
{
"epoch": 0.13764624913971094,
"grad_norm": 0.8717352151870728,
"learning_rate": 4.448190205819731e-05,
"loss": 1.034,
"step": 400
},
{
"epoch": 0.1548520302821748,
"grad_norm": 1.0397926568984985,
"learning_rate": 4.359474804826118e-05,
"loss": 1.0132,
"step": 450
},
{
"epoch": 0.1720578114246387,
"grad_norm": 1.1297979354858398,
"learning_rate": 4.270759403832506e-05,
"loss": 1.056,
"step": 500
},
{
"epoch": 0.18926359256710254,
"grad_norm": 0.9621482491493225,
"learning_rate": 4.182044002838893e-05,
"loss": 1.0463,
"step": 550
},
{
"epoch": 0.20646937370956642,
"grad_norm": 1.0356558561325073,
"learning_rate": 4.093328601845281e-05,
"loss": 1.0557,
"step": 600
},
{
"epoch": 0.2236751548520303,
"grad_norm": 0.9767435789108276,
"learning_rate": 4.0046132008516676e-05,
"loss": 1.034,
"step": 650
},
{
"epoch": 0.24088093599449414,
"grad_norm": 1.0207983255386353,
"learning_rate": 3.915897799858055e-05,
"loss": 1.0184,
"step": 700
},
{
"epoch": 0.258086717136958,
"grad_norm": 1.1012769937515259,
"learning_rate": 3.8271823988644426e-05,
"loss": 1.0514,
"step": 750
},
{
"epoch": 0.27529249827942187,
"grad_norm": 1.0124127864837646,
"learning_rate": 3.73846699787083e-05,
"loss": 1.0115,
"step": 800
},
{
"epoch": 0.2924982794218858,
"grad_norm": 0.9844532012939453,
"learning_rate": 3.649751596877218e-05,
"loss": 1.0337,
"step": 850
},
{
"epoch": 0.3097040605643496,
"grad_norm": 1.098149061203003,
"learning_rate": 3.561036195883606e-05,
"loss": 1.022,
"step": 900
},
{
"epoch": 0.3269098417068135,
"grad_norm": 0.902259886264801,
"learning_rate": 3.4723207948899934e-05,
"loss": 1.0528,
"step": 950
},
{
"epoch": 0.3441156228492774,
"grad_norm": 1.0517866611480713,
"learning_rate": 3.383605393896381e-05,
"loss": 1.0795,
"step": 1000
},
{
"epoch": 0.3441156228492774,
"eval_loss": 1.3434094190597534,
"eval_runtime": 93.1593,
"eval_samples_per_second": 7.879,
"eval_steps_per_second": 1.578,
"step": 1000
},
{
"epoch": 0.36132140399174123,
"grad_norm": 0.8887484073638916,
"learning_rate": 3.2948899929027684e-05,
"loss": 1.0397,
"step": 1050
},
{
"epoch": 0.3785271851342051,
"grad_norm": 1.0585637092590332,
"learning_rate": 3.206174591909156e-05,
"loss": 1.0981,
"step": 1100
},
{
"epoch": 0.395732966276669,
"grad_norm": 0.8692225217819214,
"learning_rate": 3.1174591909155435e-05,
"loss": 1.1407,
"step": 1150
},
{
"epoch": 0.41293874741913283,
"grad_norm": 0.8615192174911499,
"learning_rate": 3.0287437899219306e-05,
"loss": 1.0712,
"step": 1200
},
{
"epoch": 0.4301445285615967,
"grad_norm": 0.9556344747543335,
"learning_rate": 2.940028388928318e-05,
"loss": 1.092,
"step": 1250
},
{
"epoch": 0.4473503097040606,
"grad_norm": 0.9553106427192688,
"learning_rate": 2.8513129879347057e-05,
"loss": 1.0828,
"step": 1300
},
{
"epoch": 0.46455609084652444,
"grad_norm": 1.069947361946106,
"learning_rate": 2.7625975869410932e-05,
"loss": 1.0358,
"step": 1350
},
{
"epoch": 0.4817618719889883,
"grad_norm": 1.0424370765686035,
"learning_rate": 2.6738821859474804e-05,
"loss": 1.0809,
"step": 1400
},
{
"epoch": 0.4989676531314522,
"grad_norm": 0.9934790134429932,
"learning_rate": 2.585166784953868e-05,
"loss": 1.0502,
"step": 1450
},
{
"epoch": 0.516173434273916,
"grad_norm": 0.9232550859451294,
"learning_rate": 2.4964513839602558e-05,
"loss": 1.0705,
"step": 1500
},
{
"epoch": 0.5333792154163799,
"grad_norm": 0.9275680184364319,
"learning_rate": 2.4077359829666433e-05,
"loss": 1.0581,
"step": 1550
},
{
"epoch": 0.5505849965588437,
"grad_norm": 1.040781021118164,
"learning_rate": 2.3190205819730308e-05,
"loss": 1.0866,
"step": 1600
},
{
"epoch": 0.5677907777013076,
"grad_norm": 0.9358147382736206,
"learning_rate": 2.2303051809794183e-05,
"loss": 1.0942,
"step": 1650
},
{
"epoch": 0.5849965588437716,
"grad_norm": 1.0429332256317139,
"learning_rate": 2.1415897799858055e-05,
"loss": 1.0857,
"step": 1700
},
{
"epoch": 0.6022023399862354,
"grad_norm": 0.9741628170013428,
"learning_rate": 2.052874378992193e-05,
"loss": 1.1424,
"step": 1750
},
{
"epoch": 0.6194081211286993,
"grad_norm": 0.8573130965232849,
"learning_rate": 1.9641589779985805e-05,
"loss": 1.1177,
"step": 1800
},
{
"epoch": 0.6366139022711631,
"grad_norm": 1.0106691122055054,
"learning_rate": 1.875443577004968e-05,
"loss": 1.1009,
"step": 1850
},
{
"epoch": 0.653819683413627,
"grad_norm": 0.8125928044319153,
"learning_rate": 1.786728176011356e-05,
"loss": 1.0727,
"step": 1900
},
{
"epoch": 0.6710254645560908,
"grad_norm": 0.9676728844642639,
"learning_rate": 1.698012775017743e-05,
"loss": 1.1419,
"step": 1950
},
{
"epoch": 0.6882312456985548,
"grad_norm": 1.0101425647735596,
"learning_rate": 1.6092973740241306e-05,
"loss": 1.0981,
"step": 2000
},
{
"epoch": 0.6882312456985548,
"eval_loss": 1.3034558296203613,
"eval_runtime": 88.2407,
"eval_samples_per_second": 8.318,
"eval_steps_per_second": 1.666,
"step": 2000
},
{
"epoch": 0.7054370268410186,
"grad_norm": 1.0116767883300781,
"learning_rate": 1.5205819730305181e-05,
"loss": 1.1167,
"step": 2050
},
{
"epoch": 0.7226428079834825,
"grad_norm": 1.0342180728912354,
"learning_rate": 1.4318665720369056e-05,
"loss": 1.0793,
"step": 2100
},
{
"epoch": 0.7398485891259463,
"grad_norm": 0.8465341329574585,
"learning_rate": 1.3431511710432932e-05,
"loss": 1.125,
"step": 2150
},
{
"epoch": 0.7570543702684102,
"grad_norm": 0.9766820073127747,
"learning_rate": 1.2544357700496807e-05,
"loss": 1.1206,
"step": 2200
},
{
"epoch": 0.774260151410874,
"grad_norm": 0.9202566742897034,
"learning_rate": 1.1657203690560682e-05,
"loss": 1.1365,
"step": 2250
},
{
"epoch": 0.791465932553338,
"grad_norm": 1.019327998161316,
"learning_rate": 1.0770049680624557e-05,
"loss": 1.1084,
"step": 2300
},
{
"epoch": 0.8086717136958018,
"grad_norm": 0.9371334910392761,
"learning_rate": 9.88289567068843e-06,
"loss": 1.1699,
"step": 2350
},
{
"epoch": 0.8258774948382657,
"grad_norm": 0.8369758725166321,
"learning_rate": 8.995741660752308e-06,
"loss": 1.0739,
"step": 2400
},
{
"epoch": 0.8430832759807295,
"grad_norm": 0.8380680084228516,
"learning_rate": 8.108587650816183e-06,
"loss": 1.1862,
"step": 2450
},
{
"epoch": 0.8602890571231934,
"grad_norm": 1.0509248971939087,
"learning_rate": 7.221433640880057e-06,
"loss": 1.1523,
"step": 2500
},
{
"epoch": 0.8774948382656572,
"grad_norm": 1.127092719078064,
"learning_rate": 6.3342796309439315e-06,
"loss": 1.1983,
"step": 2550
},
{
"epoch": 0.8947006194081212,
"grad_norm": 1.072149634361267,
"learning_rate": 5.4471256210078075e-06,
"loss": 1.1421,
"step": 2600
},
{
"epoch": 0.911906400550585,
"grad_norm": 0.9210956692695618,
"learning_rate": 4.559971611071683e-06,
"loss": 1.1709,
"step": 2650
},
{
"epoch": 0.9291121816930489,
"grad_norm": 0.8278102874755859,
"learning_rate": 3.672817601135557e-06,
"loss": 1.1787,
"step": 2700
},
{
"epoch": 0.9463179628355127,
"grad_norm": 1.0187325477600098,
"learning_rate": 2.7856635911994322e-06,
"loss": 1.1812,
"step": 2750
},
{
"epoch": 0.9635237439779766,
"grad_norm": 0.8620675206184387,
"learning_rate": 1.8985095812633074e-06,
"loss": 1.1817,
"step": 2800
},
{
"epoch": 0.9807295251204404,
"grad_norm": 1.0401172637939453,
"learning_rate": 1.0113555713271824e-06,
"loss": 1.2238,
"step": 2850
},
{
"epoch": 0.9979353062629044,
"grad_norm": 0.9923277497291565,
"learning_rate": 1.242015613910575e-07,
"loss": 1.1401,
"step": 2900
}
],
"logging_steps": 50,
"max_steps": 2906,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 8,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.578794229561302e+18,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}