{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 18.0,
  "eval_steps": 500,
  "global_step": 288,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.64,
      "grad_norm": 0.1718759387731552,
      "learning_rate": 6.666666666666667e-06,
      "loss": 1.1135,
      "step": 10
    },
    {
      "epoch": 1.256,
      "grad_norm": 0.13761915266513824,
      "learning_rate": 9.992407582166582e-06,
      "loss": 0.9774,
      "step": 20
    },
    {
      "epoch": 1.896,
      "grad_norm": 0.07171418517827988,
      "learning_rate": 9.931806517013612e-06,
      "loss": 0.9222,
      "step": 30
    },
    {
      "epoch": 2.512,
      "grad_norm": 0.04953434690833092,
      "learning_rate": 9.811340001546252e-06,
      "loss": 0.889,
      "step": 40
    },
    {
      "epoch": 3.128,
      "grad_norm": 0.04060445353388786,
      "learning_rate": 9.632470336074009e-06,
      "loss": 0.8453,
      "step": 50
    },
    {
      "epoch": 3.768,
      "grad_norm": 0.03146224841475487,
      "learning_rate": 9.397368756032445e-06,
      "loss": 0.8146,
      "step": 60
    },
    {
      "epoch": 4.384,
      "grad_norm": 0.03437686711549759,
      "learning_rate": 9.108889076126226e-06,
      "loss": 0.8017,
      "step": 70
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.04327244311571121,
      "learning_rate": 8.770533048884483e-06,
      "loss": 0.7738,
      "step": 80
    },
    {
      "epoch": 5.64,
      "grad_norm": 0.02905658632516861,
      "learning_rate": 8.386407858128707e-06,
      "loss": 0.7444,
      "step": 90
    },
    {
      "epoch": 6.256,
      "grad_norm": 0.034299831837415695,
      "learning_rate": 7.961176263324902e-06,
      "loss": 0.7283,
      "step": 100
    },
    {
      "epoch": 6.896,
      "grad_norm": 0.027953926473855972,
      "learning_rate": 7.500000000000001e-06,
      "loss": 0.7085,
      "step": 110
    },
    {
      "epoch": 7.5120000000000005,
      "grad_norm": 0.0314958356320858,
      "learning_rate": 7.008477123264849e-06,
      "loss": 0.657,
      "step": 120
    },
    {
      "epoch": 8.128,
      "grad_norm": 0.0347384437918663,
      "learning_rate": 6.492574055008474e-06,
      "loss": 0.6495,
      "step": 130
    },
    {
      "epoch": 8.768,
      "grad_norm": 0.044140901416540146,
      "learning_rate": 5.958553159618693e-06,
      "loss": 0.5963,
      "step": 140
    },
    {
      "epoch": 9.384,
      "grad_norm": 0.05524936690926552,
      "learning_rate": 5.412896727361663e-06,
      "loss": 0.5829,
      "step": 150
    },
    {
      "epoch": 10.0,
      "grad_norm": 0.04550788924098015,
      "learning_rate": 4.862228288159191e-06,
      "loss": 0.5426,
      "step": 160
    },
    {
      "epoch": 10.64,
      "grad_norm": 0.06263619661331177,
      "learning_rate": 4.313232210907959e-06,
      "loss": 0.4995,
      "step": 170
    },
    {
      "epoch": 11.256,
      "grad_norm": 0.04113304242491722,
      "learning_rate": 3.7725725642960047e-06,
      "loss": 0.4745,
      "step": 180
    },
    {
      "epoch": 11.896,
      "grad_norm": 0.05397722125053406,
      "learning_rate": 3.2468122240362287e-06,
      "loss": 0.4374,
      "step": 190
    },
    {
      "epoch": 12.512,
      "grad_norm": 0.048114072531461716,
      "learning_rate": 2.7423332084455543e-06,
      "loss": 0.3971,
      "step": 200
    },
    {
      "epoch": 13.128,
      "grad_norm": 0.04809720814228058,
      "learning_rate": 2.265259209387867e-06,
      "loss": 0.384,
      "step": 210
    },
    {
      "epoch": 13.768,
      "grad_norm": 0.060608141124248505,
      "learning_rate": 1.8213812589501611e-06,
      "loss": 0.3477,
      "step": 220
    },
    {
      "epoch": 14.384,
      "grad_norm": 0.04567192494869232,
      "learning_rate": 1.4160874341577447e-06,
      "loss": 0.3297,
      "step": 230
    },
    {
      "epoch": 15.0,
      "grad_norm": 0.05597074702382088,
      "learning_rate": 1.0542974530180327e-06,
      "loss": 0.3006,
      "step": 240
    },
    {
      "epoch": 15.64,
      "grad_norm": 0.04579387977719307,
      "learning_rate": 7.404029558083653e-07,
      "loss": 0.2841,
      "step": 250
    },
    {
      "epoch": 16.256,
      "grad_norm": 0.041828740388154984,
      "learning_rate": 4.782141965121129e-07,
      "loss": 0.2889,
      "step": 260
    },
    {
      "epoch": 16.896,
      "grad_norm": 0.042852893471717834,
      "learning_rate": 2.7091379149682683e-07,
      "loss": 0.2701,
      "step": 270
    },
    {
      "epoch": 17.512,
      "grad_norm": 0.04142393916845322,
      "learning_rate": 1.210180868628219e-07,
      "loss": 0.2646,
      "step": 280
    }
  ],
  "logging_steps": 10,
  "max_steps": 300,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 20,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1177604318035968.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}