File size: 2,663 Bytes

5468fd0
 
 
 
 
 
 
 
 
 
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0
 
 
 
2c82cf9
5468fd0
2c82cf9
5468fd0

{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 58,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.1724137931034483,
      "grad_norm": 2108.397216796875,
      "learning_rate": 2.5454545454545454e-08,
      "loss": 1035.4573,
      "step": 5
    },
    {
      "epoch": 0.3448275862068966,
      "grad_norm": 2107.33642578125,
      "learning_rate": 5.727272727272728e-08,
      "loss": 1065.6187,
      "step": 10
    },
    {
      "epoch": 0.5172413793103449,
      "grad_norm": 2219.214111328125,
      "learning_rate": 8.90909090909091e-08,
      "loss": 1036.3791,
      "step": 15
    },
    {
      "epoch": 0.6896551724137931,
      "grad_norm": 1936.712890625,
      "learning_rate": 1.2090909090909092e-07,
      "loss": 1035.5998,
      "step": 20
    },
    {
      "epoch": 0.8620689655172413,
      "grad_norm": 2196.76318359375,
      "learning_rate": 1.5272727272727273e-07,
      "loss": 1064.9285,
      "step": 25
    },
    {
      "epoch": 1.0344827586206897,
      "grad_norm": 2548.19189453125,
      "learning_rate": 1.8454545454545454e-07,
      "loss": 1048.6884,
      "step": 30
    },
    {
      "epoch": 1.206896551724138,
      "grad_norm": 2420.173828125,
      "learning_rate": 2.1636363636363637e-07,
      "loss": 1050.2486,
      "step": 35
    },
    {
      "epoch": 1.3793103448275863,
      "grad_norm": 2168.558349609375,
      "learning_rate": 2.481818181818182e-07,
      "loss": 1027.9692,
      "step": 40
    },
    {
      "epoch": 1.5517241379310345,
      "grad_norm": 2378.92431640625,
      "learning_rate": 2.8e-07,
      "loss": 1043.8671,
      "step": 45
    },
    {
      "epoch": 1.7241379310344827,
      "grad_norm": 2213.712158203125,
      "learning_rate": 3.1181818181818186e-07,
      "loss": 1042.2683,
      "step": 50
    },
    {
      "epoch": 1.896551724137931,
      "grad_norm": 2194.255615234375,
      "learning_rate": 3.436363636363636e-07,
      "loss": 1032.4518,
      "step": 55
    }
  ],
  "logging_steps": 5,
  "max_steps": 58,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.7211980486851994e+17,
  "train_batch_size": 14,
  "trial_name": null,
  "trial_params": null
}