File size: 1,696 Bytes
1131fcc
 
 
 
 
ca8fcf6
1131fcc
 
 
 
 
ca8fcf6
 
1131fcc
ca8fcf6
1131fcc
 
 
ca8fcf6
 
1131fcc
ca8fcf6
1131fcc
 
 
ca8fcf6
 
1131fcc
ca8fcf6
1131fcc
 
 
ca8fcf6
 
1131fcc
ca8fcf6
1131fcc
 
 
ca8fcf6
 
1131fcc
ca8fcf6
1131fcc
 
 
ca8fcf6
 
1131fcc
ca8fcf6
1131fcc
 
 
 
ca8fcf6
1131fcc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca8fcf6
1131fcc
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.5205761790275574,
      "learning_rate": 0.0002,
      "loss": 0.8035,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.3349571228027344,
      "learning_rate": 0.0002,
      "loss": 0.3891,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 0.42590829730033875,
      "learning_rate": 0.0002,
      "loss": 0.2422,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.2039356529712677,
      "learning_rate": 0.0002,
      "loss": 0.1883,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 0.3233167231082916,
      "learning_rate": 0.0002,
      "loss": 0.158,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.279433935880661,
      "learning_rate": 0.0002,
      "loss": 0.1395,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.0351908666638336e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}