File size: 4,006 Bytes
d4518f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
[
    {
        "loss": 2.0908,
        "grad_norm": 0.5866689682006836,
        "learning_rate": 8.923190911336132e-05,
        "epoch": 0.22189421894218941,
        "step": 451
    },
    {
        "eval_loss": 1.9633848667144775,
        "eval_runtime": 1103.6079,
        "eval_samples_per_second": 26.193,
        "eval_steps_per_second": 1.637,
        "epoch": 0.22189421894218941,
        "step": 451
    },
    {
        "loss": 1.9265,
        "grad_norm": 0.5757979154586792,
        "learning_rate": 7.809335638429242e-05,
        "epoch": 0.44378843788437883,
        "step": 902
    },
    {
        "eval_loss": 1.9037492275238037,
        "eval_runtime": 1102.9066,
        "eval_samples_per_second": 26.21,
        "eval_steps_per_second": 1.638,
        "epoch": 0.44378843788437883,
        "step": 902
    },
    {
        "loss": 1.8852,
        "grad_norm": 0.5880784392356873,
        "learning_rate": 6.695480365522352e-05,
        "epoch": 0.6656826568265682,
        "step": 1353
    },
    {
        "eval_loss": 1.8703107833862305,
        "eval_runtime": 1103.8364,
        "eval_samples_per_second": 26.188,
        "eval_steps_per_second": 1.637,
        "epoch": 0.6656826568265682,
        "step": 1353
    },
    {
        "loss": 1.8585,
        "grad_norm": 0.6274667978286743,
        "learning_rate": 5.581625092615461e-05,
        "epoch": 0.8875768757687577,
        "step": 1804
    },
    {
        "eval_loss": 1.8478941917419434,
        "eval_runtime": 1103.7709,
        "eval_samples_per_second": 26.189,
        "eval_steps_per_second": 1.637,
        "epoch": 0.8875768757687577,
        "step": 1804
    },
    {
        "loss": 1.8051,
        "grad_norm": 0.6508978009223938,
        "learning_rate": 4.4677698197085704e-05,
        "epoch": 1.1094710947109472,
        "step": 2255
    },
    {
        "eval_loss": 1.835593819618225,
        "eval_runtime": 1103.8475,
        "eval_samples_per_second": 26.187,
        "eval_steps_per_second": 1.637,
        "epoch": 1.1094710947109472,
        "step": 2255
    },
    {
        "loss": 1.7622,
        "grad_norm": 0.6831102967262268,
        "learning_rate": 3.3539145468016795e-05,
        "epoch": 1.3313653136531365,
        "step": 2706
    },
    {
        "eval_loss": 1.8246678113937378,
        "eval_runtime": 1103.4364,
        "eval_samples_per_second": 26.197,
        "eval_steps_per_second": 1.638,
        "epoch": 1.3313653136531365,
        "step": 2706
    },
    {
        "loss": 1.7536,
        "grad_norm": 0.6920585036277771,
        "learning_rate": 2.240059273894789e-05,
        "epoch": 1.5532595325953258,
        "step": 3157
    },
    {
        "eval_loss": 1.8157387971878052,
        "eval_runtime": 1103.7228,
        "eval_samples_per_second": 26.19,
        "eval_steps_per_second": 1.637,
        "epoch": 1.5532595325953258,
        "step": 3157
    },
    {
        "loss": 1.7467,
        "grad_norm": 0.6837635040283203,
        "learning_rate": 1.1262040009878982e-05,
        "epoch": 1.7751537515375153,
        "step": 3608
    },
    {
        "eval_loss": 1.808944821357727,
        "eval_runtime": 1103.6481,
        "eval_samples_per_second": 26.192,
        "eval_steps_per_second": 1.637,
        "epoch": 1.7751537515375153,
        "step": 3608
    },
    {
        "loss": 1.7457,
        "grad_norm": 0.6841686367988586,
        "learning_rate": 1.2348728081007656e-07,
        "epoch": 1.9970479704797048,
        "step": 4059
    },
    {
        "eval_loss": 1.805881142616272,
        "eval_runtime": 1103.2943,
        "eval_samples_per_second": 26.201,
        "eval_steps_per_second": 1.638,
        "epoch": 1.9970479704797048,
        "step": 4059
    },
    {
        "train_runtime": 69487.1891,
        "train_samples_per_second": 7.488,
        "train_steps_per_second": 0.058,
        "total_flos": 1.0874326325169095e+19,
        "train_loss": 1.841461892437747,
        "epoch": 1.9995079950799508,
        "step": 4064
    }
]