Sauron0019's picture
Upload 20 files
869aea1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.99728014505893,
"eval_steps": 500,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11332728921124206,
"grad_norm": 12.256142616271973,
"learning_rate": 3.2000000000000003e-06,
"loss": 6.768,
"num_input_tokens_seen": 320000,
"step": 25
},
{
"epoch": 0.22665457842248413,
"grad_norm": 14.015169143676758,
"learning_rate": 6.533333333333334e-06,
"loss": 2.2714,
"num_input_tokens_seen": 640000,
"step": 50
},
{
"epoch": 0.3399818676337262,
"grad_norm": 2.729668140411377,
"learning_rate": 9.866666666666668e-06,
"loss": 1.6663,
"num_input_tokens_seen": 960000,
"step": 75
},
{
"epoch": 0.45330915684496825,
"grad_norm": 2.5849289894104004,
"learning_rate": 1.3200000000000002e-05,
"loss": 1.5351,
"num_input_tokens_seen": 1280000,
"step": 100
},
{
"epoch": 0.5666364460562103,
"grad_norm": 2.5873868465423584,
"learning_rate": 1.6533333333333333e-05,
"loss": 1.4698,
"num_input_tokens_seen": 1600000,
"step": 125
},
{
"epoch": 0.6799637352674524,
"grad_norm": 1.8172498941421509,
"learning_rate": 1.9866666666666667e-05,
"loss": 1.4164,
"num_input_tokens_seen": 1920000,
"step": 150
},
{
"epoch": 0.7932910244786945,
"grad_norm": 3.378293037414551,
"learning_rate": 1.994670819911521e-05,
"loss": 1.3952,
"num_input_tokens_seen": 2239840,
"step": 175
},
{
"epoch": 0.9066183136899365,
"grad_norm": 1.4730700254440308,
"learning_rate": 1.977848341505657e-05,
"loss": 1.3792,
"num_input_tokens_seen": 2559800,
"step": 200
},
{
"epoch": 1.0226654578422485,
"grad_norm": 1.2745240926742554,
"learning_rate": 1.949717842791432e-05,
"loss": 1.3537,
"num_input_tokens_seen": 2884920,
"step": 225
},
{
"epoch": 1.1359927470534905,
"grad_norm": 1.7677239179611206,
"learning_rate": 1.9106046300942165e-05,
"loss": 1.2956,
"num_input_tokens_seen": 3204920,
"step": 250
},
{
"epoch": 1.2493200362647325,
"grad_norm": 1.9639768600463867,
"learning_rate": 1.8609610158889943e-05,
"loss": 1.2642,
"num_input_tokens_seen": 3524920,
"step": 275
},
{
"epoch": 1.3626473254759746,
"grad_norm": 1.738120436668396,
"learning_rate": 1.8013610881746767e-05,
"loss": 1.2527,
"num_input_tokens_seen": 3844920,
"step": 300
},
{
"epoch": 1.4759746146872166,
"grad_norm": 1.5475760698318481,
"learning_rate": 1.732494071613579e-05,
"loss": 1.2602,
"num_input_tokens_seen": 4164920,
"step": 325
},
{
"epoch": 1.5893019038984586,
"grad_norm": 1.3410508632659912,
"learning_rate": 1.6551563572090855e-05,
"loss": 1.2551,
"num_input_tokens_seen": 4484840,
"step": 350
},
{
"epoch": 1.7026291931097008,
"grad_norm": 1.6581236124038696,
"learning_rate": 1.5702422926917872e-05,
"loss": 1.2325,
"num_input_tokens_seen": 4804840,
"step": 375
},
{
"epoch": 1.8159564823209429,
"grad_norm": 1.8297406435012817,
"learning_rate": 1.4787338401157888e-05,
"loss": 1.2436,
"num_input_tokens_seen": 5124840,
"step": 400
},
{
"epoch": 1.929283771532185,
"grad_norm": 1.9106981754302979,
"learning_rate": 1.3816892202666591e-05,
"loss": 1.2319,
"num_input_tokens_seen": 5444840,
"step": 425
},
{
"epoch": 2.045330915684497,
"grad_norm": 1.4120745658874512,
"learning_rate": 1.2802306751992163e-05,
"loss": 1.2132,
"num_input_tokens_seen": 5769800,
"step": 450
},
{
"epoch": 2.158658204895739,
"grad_norm": 1.541704535484314,
"learning_rate": 1.1755314904214284e-05,
"loss": 1.0794,
"num_input_tokens_seen": 6089800,
"step": 475
},
{
"epoch": 2.271985494106981,
"grad_norm": 2.239482879638672,
"learning_rate": 1.06880242680232e-05,
"loss": 1.0779,
"num_input_tokens_seen": 6409680,
"step": 500
},
{
"epoch": 2.385312783318223,
"grad_norm": 2.1483075618743896,
"learning_rate": 9.612777191078257e-06,
"loss": 1.0722,
"num_input_tokens_seen": 6729680,
"step": 525
},
{
"epoch": 2.498640072529465,
"grad_norm": 1.631958246231079,
"learning_rate": 8.542008030801254e-06,
"loss": 1.0663,
"num_input_tokens_seen": 7049680,
"step": 550
},
{
"epoch": 2.611967361740707,
"grad_norm": 1.948183298110962,
"learning_rate": 7.4880993611518095e-06,
"loss": 1.056,
"num_input_tokens_seen": 7369680,
"step": 575
},
{
"epoch": 2.725294650951949,
"grad_norm": 1.8783904314041138,
"learning_rate": 6.463238778236287e-06,
"loss": 1.0578,
"num_input_tokens_seen": 7689680,
"step": 600
},
{
"epoch": 2.838621940163191,
"grad_norm": 2.3182311058044434,
"learning_rate": 5.479277960676959e-06,
"loss": 1.0531,
"num_input_tokens_seen": 8009600,
"step": 625
},
{
"epoch": 2.951949229374433,
"grad_norm": 2.483482837677002,
"learning_rate": 4.547595614593489e-06,
"loss": 1.0523,
"num_input_tokens_seen": 8329600,
"step": 650
},
{
"epoch": 3.067996373526745,
"grad_norm": 1.3066755533218384,
"learning_rate": 3.6789658881265135e-06,
"loss": 1.0127,
"num_input_tokens_seen": 8654720,
"step": 675
},
{
"epoch": 3.1813236627379875,
"grad_norm": 1.4423686265945435,
"learning_rate": 2.883433777182255e-06,
"loss": 0.9245,
"num_input_tokens_seen": 8974520,
"step": 700
},
{
"epoch": 3.2946509519492295,
"grad_norm": 2.5524277687072754,
"learning_rate": 2.170198963229372e-06,
"loss": 0.918,
"num_input_tokens_seen": 9294520,
"step": 725
},
{
"epoch": 3.4079782411604715,
"grad_norm": 1.343592882156372,
"learning_rate": 1.547509426469368e-06,
"loss": 0.9192,
"num_input_tokens_seen": 9614520,
"step": 750
},
{
"epoch": 3.5213055303717136,
"grad_norm": 1.9703131914138794,
"learning_rate": 1.022566064657663e-06,
"loss": 0.9177,
"num_input_tokens_seen": 9934520,
"step": 775
},
{
"epoch": 3.6346328195829556,
"grad_norm": 2.417506694793701,
"learning_rate": 6.01439420581047e-07,
"loss": 0.9152,
"num_input_tokens_seen": 10254520,
"step": 800
},
{
"epoch": 3.7479601087941976,
"grad_norm": 1.23152494430542,
"learning_rate": 2.889994811704966e-07,
"loss": 0.917,
"num_input_tokens_seen": 10574520,
"step": 825
},
{
"epoch": 3.8612873980054396,
"grad_norm": 1.5819039344787598,
"learning_rate": 8.885936006545304e-08,
"loss": 0.912,
"num_input_tokens_seen": 10894520,
"step": 850
},
{
"epoch": 3.9746146872166817,
"grad_norm": 1.4086824655532837,
"learning_rate": 3.333514894887646e-09,
"loss": 0.9239,
"num_input_tokens_seen": 11214520,
"step": 875
},
{
"epoch": 3.99728014505893,
"num_input_tokens_seen": 11278520,
"step": 880,
"total_flos": 7.651349314204147e+17,
"train_loss": 1.351207665421746,
"train_runtime": 8552.2085,
"train_samples_per_second": 2.578,
"train_steps_per_second": 0.103
}
],
"logging_steps": 25,
"max_steps": 880,
"num_input_tokens_seen": 11278520,
"num_train_epochs": 4,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.651349314204147e+17,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}