{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.049561381771323784,
  "eval_steps": 200000,
  "global_step": 3000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0016520460590441263,
      "grad_norm": 51.92022705078125,
      "learning_rate": 3.2044928972580115e-07,
      "loss": 13.3171,
      "step": 100
    },
    {
      "epoch": 0.0033040921180882525,
      "grad_norm": 68.25243377685547,
      "learning_rate": 6.508093822266271e-07,
      "loss": 12.9799,
      "step": 200
    },
    {
      "epoch": 0.004956138177132379,
      "grad_norm": 69.47785186767578,
      "learning_rate": 9.811694747274531e-07,
      "loss": 12.5133,
      "step": 300
    },
    {
      "epoch": 0.006608184236176505,
      "grad_norm": 73.07315063476562,
      "learning_rate": 1.311529567228279e-06,
      "loss": 11.9388,
      "step": 400
    },
    {
      "epoch": 0.008260230295220631,
      "grad_norm": 82.68733215332031,
      "learning_rate": 1.6418896597291048e-06,
      "loss": 11.0616,
      "step": 500
    },
    {
      "epoch": 0.009912276354264758,
      "grad_norm": 57.61735534667969,
      "learning_rate": 1.972249752229931e-06,
      "loss": 10.2712,
      "step": 600
    },
    {
      "epoch": 0.011564322413308884,
      "grad_norm": 44.42943572998047,
      "learning_rate": 2.302609844730757e-06,
      "loss": 9.5253,
      "step": 700
    },
    {
      "epoch": 0.01321636847235301,
      "grad_norm": 27.03646469116211,
      "learning_rate": 2.6329699372315828e-06,
      "loss": 8.7706,
      "step": 800
    },
    {
      "epoch": 0.014868414531397135,
      "grad_norm": 15.231706619262695,
      "learning_rate": 2.9633300297324087e-06,
      "loss": 8.4333,
      "step": 900
    },
    {
      "epoch": 0.016520460590441263,
      "grad_norm": 14.189949035644531,
      "learning_rate": 3.2936901222332346e-06,
      "loss": 8.0902,
      "step": 1000
    },
    {
      "epoch": 0.018172506649485387,
      "grad_norm": 12.241333961486816,
      "learning_rate": 3.6240502147340605e-06,
      "loss": 7.8862,
      "step": 1100
    },
    {
      "epoch": 0.019824552708529515,
      "grad_norm": 11.400131225585938,
      "learning_rate": 3.9544103072348865e-06,
      "loss": 7.7362,
      "step": 1200
    },
    {
      "epoch": 0.02147659876757364,
      "grad_norm": 12.072014808654785,
      "learning_rate": 4.284770399735712e-06,
      "loss": 7.6007,
      "step": 1300
    },
    {
      "epoch": 0.023128644826617768,
      "grad_norm": 11.08774185180664,
      "learning_rate": 4.615130492236538e-06,
      "loss": 7.5304,
      "step": 1400
    },
    {
      "epoch": 0.024780690885661892,
      "grad_norm": 13.02505874633789,
      "learning_rate": 4.945490584737364e-06,
      "loss": 7.4249,
      "step": 1500
    },
    {
      "epoch": 0.02643273694470602,
      "grad_norm": 13.522186279296875,
      "learning_rate": 5.27585067723819e-06,
      "loss": 7.3035,
      "step": 1600
    },
    {
      "epoch": 0.028084783003750145,
      "grad_norm": 45.22550964355469,
      "learning_rate": 5.606210769739015e-06,
      "loss": 7.2026,
      "step": 1700
    },
    {
      "epoch": 0.02973682906279427,
      "grad_norm": 15.62098503112793,
      "learning_rate": 5.936570862239842e-06,
      "loss": 7.1572,
      "step": 1800
    },
    {
      "epoch": 0.0313888751218384,
      "grad_norm": 16.570518493652344,
      "learning_rate": 6.266930954740668e-06,
      "loss": 7.0523,
      "step": 1900
    },
    {
      "epoch": 0.033040921180882525,
      "grad_norm": 16.82353401184082,
      "learning_rate": 6.597291047241494e-06,
      "loss": 7.1158,
      "step": 2000
    },
    {
      "epoch": 0.034692967239926646,
      "grad_norm": 17.38075828552246,
      "learning_rate": 6.924347538817311e-06,
      "loss": 6.9856,
      "step": 2100
    },
    {
      "epoch": 0.036345013298970774,
      "grad_norm": 93.04572296142578,
      "learning_rate": 7.2547076313181375e-06,
      "loss": 7.0865,
      "step": 2200
    },
    {
      "epoch": 0.0379970593580149,
      "grad_norm": 17.861074447631836,
      "learning_rate": 7.585067723818963e-06,
      "loss": 6.9496,
      "step": 2300
    },
    {
      "epoch": 0.03964910541705903,
      "grad_norm": 19.067747116088867,
      "learning_rate": 7.91542781631979e-06,
      "loss": 6.9294,
      "step": 2400
    },
    {
      "epoch": 0.04130115147610315,
      "grad_norm": 16.43912696838379,
      "learning_rate": 8.245787908820615e-06,
      "loss": 6.8825,
      "step": 2500
    },
    {
      "epoch": 0.04295319753514728,
      "grad_norm": 140.5387725830078,
      "learning_rate": 8.576148001321441e-06,
      "loss": 6.8218,
      "step": 2600
    },
    {
      "epoch": 0.04460524359419141,
      "grad_norm": 22.34341049194336,
      "learning_rate": 8.903204492897258e-06,
      "loss": 6.8416,
      "step": 2700
    },
    {
      "epoch": 0.046257289653235535,
      "grad_norm": 16.260499954223633,
      "learning_rate": 9.233564585398084e-06,
      "loss": 6.7184,
      "step": 2800
    },
    {
      "epoch": 0.047909335712279656,
      "grad_norm": 20.075071334838867,
      "learning_rate": 9.56392467789891e-06,
      "loss": 6.9183,
      "step": 2900
    },
    {
      "epoch": 0.049561381771323784,
      "grad_norm": 45.1911735534668,
      "learning_rate": 9.894284770399738e-06,
      "loss": 6.7166,
      "step": 3000
    }
  ],
  "logging_steps": 100,
  "max_steps": 60531,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 128,
  "trial_name": null,
  "trial_params": null
}