jnmrr commited on
Commit
991a653
·
verified ·
1 Parent(s): dd9f3b2

Upload RT-DETRv2 voucher classifier

Browse files
README.md CHANGED
@@ -86,8 +86,8 @@ This model is a fine-tuned version of [PekingU/rtdetr_v2_r101vd](https://hugging
86
  - **Tesoreria receipts**: 0.0000 (needs improvement)
87
 
88
  **Model Confidence:**
89
- - **Digital invoices mean confidence**: 0.5811 (low)
90
- - **Fisico receipts mean confidence**: 0.0000 (low)
91
  - **Tesoreria receipts mean confidence**: 0.0000 (low)
92
 
93
  **Performance by Object Size:**
@@ -114,16 +114,16 @@ This model is a fine-tuned version of [PekingU/rtdetr_v2_r101vd](https://hugging
114
  - **RAM**: 235.9 GB
115
  - **GPU configuration**: H100 optimized
116
 
117
- **Training Time**: 27.6 minutes
118
 
119
  **Training Summary:**
120
- - **Final training loss**: 10.5270
121
  - **Final learning rate**: 1.77e-11
122
 
123
 
124
  ### MLflow Tracking
125
 
126
- - **MLflow Run ID**: 27297b63155142db8c9b015a3205e499
127
  - **MLflow Experiment**: RT-DETRv2_Voucher_Classification
128
 
129
 
 
86
  - **Tesoreria receipts**: 0.0000 (needs improvement)
87
 
88
  **Model Confidence:**
89
+ - **Digital invoices mean confidence**: 0.4218 (low)
90
+ - **Fisico receipts mean confidence**: 0.3837 (low)
91
  - **Tesoreria receipts mean confidence**: 0.0000 (low)
92
 
93
  **Performance by Object Size:**
 
114
  - **RAM**: 235.9 GB
115
  - **GPU configuration**: H100 optimized
116
 
117
+ **Training Time**: 27.0 minutes
118
 
119
  **Training Summary:**
120
+ - **Final training loss**: 10.7460
121
  - **Final learning rate**: 1.77e-11
122
 
123
 
124
  ### MLflow Tracking
125
 
126
+ - **MLflow Run ID**: 6b50f63a6e3144b7a719bbb2b15cb77a
127
  - **MLflow Experiment**: RT-DETRv2_Voucher_Classification
128
 
129
 
checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e15219ede20064681b971044da73d7daf73eec755a5acb92dea3cf8158fc8d9
3
  size 306699044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8016ef1056593c544ea3cc47df3ed4b46437434f1c167bd7c17e98362bf291e
3
  size 306699044
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0297a542a16615f183c62cb889e5a7f4d3b07fedbf590dac7021616a59cf1d03
3
  size 611580898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecf9a17d55473fe89a13c7be95a701c1739b4ab392380bc62fe3414b18a61724
3
  size 611580898
checkpoint-1000/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 1000,
3
- "best_metric": 65.57749938964844,
4
- "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-1000",
5
  "epoch": 47.61904761904762,
6
  "eval_steps": 200,
7
  "global_step": 1000,
@@ -11,217 +11,217 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
- "grad_norm": 10800.818359375,
15
  "learning_rate": 7.8e-07,
16
- "loss": 1388.4377,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
- "grad_norm": 21733.138671875,
22
  "learning_rate": 1.5800000000000001e-06,
23
- "loss": 1273.0859,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
- "grad_norm": 2858.446044921875,
29
  "learning_rate": 2.38e-06,
30
- "loss": 1063.7887,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
- "grad_norm": 4524.0263671875,
36
  "learning_rate": 3.1800000000000005e-06,
37
- "loss": 771.9299,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
- "grad_norm": 2449.484130859375,
43
  "learning_rate": 3.980000000000001e-06,
44
- "loss": 519.636,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
- "eval_loss": 561.4364624023438,
50
- "eval_runtime": 8.2677,
51
- "eval_samples_per_second": 14.03,
52
- "eval_steps_per_second": 0.484,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
- "grad_norm": 3173.60546875,
58
  "learning_rate": 4.78e-06,
59
- "loss": 396.2762,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
- "grad_norm": 957.3853149414062,
65
  "learning_rate": 5.580000000000001e-06,
66
- "loss": 317.4443,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
- "grad_norm": 2215.865966796875,
72
  "learning_rate": 6.380000000000001e-06,
73
- "loss": 244.5671,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
- "grad_norm": 494.6834716796875,
79
  "learning_rate": 7.180000000000001e-06,
80
- "loss": 200.2045,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
- "grad_norm": 389.99395751953125,
86
  "learning_rate": 7.980000000000002e-06,
87
- "loss": 143.4672,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
- "eval_loss": 212.24693298339844,
93
- "eval_runtime": 7.6261,
94
- "eval_samples_per_second": 15.211,
95
- "eval_steps_per_second": 0.525,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
- "grad_norm": 296.1668395996094,
101
  "learning_rate": 8.78e-06,
102
- "loss": 108.6625,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
- "grad_norm": 270.3917541503906,
108
  "learning_rate": 9.58e-06,
109
- "loss": 86.1836,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
- "grad_norm": 252.61672973632812,
115
  "learning_rate": 9.993604266900276e-06,
116
- "loss": 66.8246,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
- "grad_norm": 176.43890380859375,
122
  "learning_rate": 9.938441702975689e-06,
123
- "loss": 52.6602,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
- "grad_norm": 143.43080139160156,
129
  "learning_rate": 9.827324621255206e-06,
130
- "loss": 45.2324,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
- "eval_loss": 131.0333709716797,
136
- "eval_runtime": 7.5366,
137
- "eval_samples_per_second": 15.392,
138
- "eval_steps_per_second": 0.531,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
- "grad_norm": 125.09796142578125,
144
  "learning_rate": 9.661512022674852e-06,
145
- "loss": 36.3571,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
- "grad_norm": 130.8366241455078,
151
  "learning_rate": 9.4428826302319e-06,
152
- "loss": 30.4001,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
- "grad_norm": 98.4572525024414,
158
  "learning_rate": 9.173913602302594e-06,
159
- "loss": 27.7692,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
- "grad_norm": 118.89614868164062,
165
  "learning_rate": 8.857652465447947e-06,
166
- "loss": 24.9675,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
- "grad_norm": 89.57968139648438,
172
  "learning_rate": 8.497682584720065e-06,
173
- "loss": 24.3789,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
- "eval_loss": 80.99005126953125,
179
- "eval_runtime": 7.2774,
180
- "eval_samples_per_second": 15.94,
181
- "eval_steps_per_second": 0.55,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
- "grad_norm": 132.4896240234375,
187
  "learning_rate": 8.098082562704253e-06,
188
- "loss": 20.1034,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
- "grad_norm": 81.84835815429688,
194
  "learning_rate": 7.663380027322022e-06,
195
- "loss": 18.1971,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
- "grad_norm": 55.926551818847656,
201
  "learning_rate": 7.1985003319975696e-06,
202
- "loss": 17.9396,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
- "grad_norm": 64.777587890625,
208
  "learning_rate": 6.708710749435427e-06,
209
- "loss": 15.3426,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
- "grad_norm": 53.17697525024414,
215
  "learning_rate": 6.199560791316009e-06,
216
- "loss": 16.3762,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
- "eval_loss": 65.57749938964844,
222
- "eval_runtime": 7.054,
223
- "eval_samples_per_second": 16.445,
224
- "eval_steps_per_second": 0.567,
225
  "step": 1000
226
  }
227
  ],
 
1
  {
2
+ "best_global_step": 800,
3
+ "best_metric": 75.64146423339844,
4
+ "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-800",
5
  "epoch": 47.61904761904762,
6
  "eval_steps": 200,
7
  "global_step": 1000,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
+ "grad_norm": 23553.73828125,
15
  "learning_rate": 7.8e-07,
16
+ "loss": 1390.8067,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
+ "grad_norm": 7427.158203125,
22
  "learning_rate": 1.5800000000000001e-06,
23
+ "loss": 1273.2542,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
+ "grad_norm": 6636.22314453125,
29
  "learning_rate": 2.38e-06,
30
+ "loss": 1059.9522,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
+ "grad_norm": 1760.4281005859375,
36
  "learning_rate": 3.1800000000000005e-06,
37
+ "loss": 790.0425,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
+ "grad_norm": 1625.7744140625,
43
  "learning_rate": 3.980000000000001e-06,
44
+ "loss": 517.1978,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
+ "eval_loss": 535.0709838867188,
50
+ "eval_runtime": 8.3595,
51
+ "eval_samples_per_second": 13.876,
52
+ "eval_steps_per_second": 0.478,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
+ "grad_norm": 1354.9310302734375,
58
  "learning_rate": 4.78e-06,
59
+ "loss": 353.8703,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
+ "grad_norm": 873.0985107421875,
65
  "learning_rate": 5.580000000000001e-06,
66
+ "loss": 262.4233,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
+ "grad_norm": 1516.567626953125,
72
  "learning_rate": 6.380000000000001e-06,
73
+ "loss": 205.628,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
+ "grad_norm": 5139.3779296875,
79
  "learning_rate": 7.180000000000001e-06,
80
+ "loss": 174.0845,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
+ "grad_norm": 480.6575622558594,
86
  "learning_rate": 7.980000000000002e-06,
87
+ "loss": 142.4243,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
+ "eval_loss": 214.48715209960938,
93
+ "eval_runtime": 7.9717,
94
+ "eval_samples_per_second": 14.551,
95
+ "eval_steps_per_second": 0.502,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
+ "grad_norm": 366.5032958984375,
101
  "learning_rate": 8.78e-06,
102
+ "loss": 115.4496,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
+ "grad_norm": 435.5931701660156,
108
  "learning_rate": 9.58e-06,
109
+ "loss": 92.9752,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
+ "grad_norm": 232.87234497070312,
115
  "learning_rate": 9.993604266900276e-06,
116
+ "loss": 78.7805,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
+ "grad_norm": 188.52627563476562,
122
  "learning_rate": 9.938441702975689e-06,
123
+ "loss": 59.0797,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
+ "grad_norm": 177.4958038330078,
129
  "learning_rate": 9.827324621255206e-06,
130
+ "loss": 46.9344,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
+ "eval_loss": 129.46548461914062,
136
+ "eval_runtime": 7.2365,
137
+ "eval_samples_per_second": 16.03,
138
+ "eval_steps_per_second": 0.553,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
+ "grad_norm": 135.09072875976562,
144
  "learning_rate": 9.661512022674852e-06,
145
+ "loss": 39.1039,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
+ "grad_norm": 128.56063842773438,
151
  "learning_rate": 9.4428826302319e-06,
152
+ "loss": 32.5044,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
+ "grad_norm": 99.94125366210938,
158
  "learning_rate": 9.173913602302594e-06,
159
+ "loss": 27.9798,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
+ "grad_norm": 128.35171508789062,
165
  "learning_rate": 8.857652465447947e-06,
166
+ "loss": 24.9475,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
+ "grad_norm": 113.99468231201172,
172
  "learning_rate": 8.497682584720065e-06,
173
+ "loss": 22.38,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
+ "eval_loss": 75.64146423339844,
179
+ "eval_runtime": 7.294,
180
+ "eval_samples_per_second": 15.904,
181
+ "eval_steps_per_second": 0.548,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
+ "grad_norm": 190.8120574951172,
187
  "learning_rate": 8.098082562704253e-06,
188
+ "loss": 19.868,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
+ "grad_norm": 58.219482421875,
194
  "learning_rate": 7.663380027322022e-06,
195
+ "loss": 22.0343,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
+ "grad_norm": 89.1216049194336,
201
  "learning_rate": 7.1985003319975696e-06,
202
+ "loss": 16.9501,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
+ "grad_norm": 147.2601776123047,
208
  "learning_rate": 6.708710749435427e-06,
209
+ "loss": 15.9704,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
+ "grad_norm": 103.56001281738281,
215
  "learning_rate": 6.199560791316009e-06,
216
+ "loss": 15.38,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
+ "eval_loss": 79.28771209716797,
222
+ "eval_runtime": 7.3262,
223
+ "eval_samples_per_second": 15.834,
224
+ "eval_steps_per_second": 0.546,
225
  "step": 1000
226
  }
227
  ],
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0e178d29d8bd1005e0ebacb559c58b57ef56dd83b9d009bde5540a5128a1d4a
3
  size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4185c232729e9f26479512d5ae60057198316f129ed28036d583f86d686e96
3
  size 5777
checkpoint-1200/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d408ce3ae88f5fc0fd2cdd76846c1a2f90f63dcb1ee9d633e77e90da0743c34
3
  size 306699044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0cc801e4b2876cb018d78b072e584c108ec7af9da7588b7f7b9d75c799ba940
3
  size 306699044
checkpoint-1200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4e0185ae80fb21f14573e6ecee06fff8101e23938cb3ecdbca6303f3084883d
3
  size 611580898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f60249dca57809c46c4992bac2974000714ff38516f20e2faa0c142a9841632
3
  size 611580898
checkpoint-1200/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 1200,
3
- "best_metric": 58.122344970703125,
4
  "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-1200",
5
  "epoch": 57.142857142857146,
6
  "eval_steps": 200,
@@ -11,260 +11,260 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
- "grad_norm": 10800.818359375,
15
  "learning_rate": 7.8e-07,
16
- "loss": 1388.4377,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
- "grad_norm": 21733.138671875,
22
  "learning_rate": 1.5800000000000001e-06,
23
- "loss": 1273.0859,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
- "grad_norm": 2858.446044921875,
29
  "learning_rate": 2.38e-06,
30
- "loss": 1063.7887,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
- "grad_norm": 4524.0263671875,
36
  "learning_rate": 3.1800000000000005e-06,
37
- "loss": 771.9299,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
- "grad_norm": 2449.484130859375,
43
  "learning_rate": 3.980000000000001e-06,
44
- "loss": 519.636,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
- "eval_loss": 561.4364624023438,
50
- "eval_runtime": 8.2677,
51
- "eval_samples_per_second": 14.03,
52
- "eval_steps_per_second": 0.484,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
- "grad_norm": 3173.60546875,
58
  "learning_rate": 4.78e-06,
59
- "loss": 396.2762,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
- "grad_norm": 957.3853149414062,
65
  "learning_rate": 5.580000000000001e-06,
66
- "loss": 317.4443,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
- "grad_norm": 2215.865966796875,
72
  "learning_rate": 6.380000000000001e-06,
73
- "loss": 244.5671,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
- "grad_norm": 494.6834716796875,
79
  "learning_rate": 7.180000000000001e-06,
80
- "loss": 200.2045,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
- "grad_norm": 389.99395751953125,
86
  "learning_rate": 7.980000000000002e-06,
87
- "loss": 143.4672,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
- "eval_loss": 212.24693298339844,
93
- "eval_runtime": 7.6261,
94
- "eval_samples_per_second": 15.211,
95
- "eval_steps_per_second": 0.525,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
- "grad_norm": 296.1668395996094,
101
  "learning_rate": 8.78e-06,
102
- "loss": 108.6625,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
- "grad_norm": 270.3917541503906,
108
  "learning_rate": 9.58e-06,
109
- "loss": 86.1836,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
- "grad_norm": 252.61672973632812,
115
  "learning_rate": 9.993604266900276e-06,
116
- "loss": 66.8246,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
- "grad_norm": 176.43890380859375,
122
  "learning_rate": 9.938441702975689e-06,
123
- "loss": 52.6602,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
- "grad_norm": 143.43080139160156,
129
  "learning_rate": 9.827324621255206e-06,
130
- "loss": 45.2324,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
- "eval_loss": 131.0333709716797,
136
- "eval_runtime": 7.5366,
137
- "eval_samples_per_second": 15.392,
138
- "eval_steps_per_second": 0.531,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
- "grad_norm": 125.09796142578125,
144
  "learning_rate": 9.661512022674852e-06,
145
- "loss": 36.3571,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
- "grad_norm": 130.8366241455078,
151
  "learning_rate": 9.4428826302319e-06,
152
- "loss": 30.4001,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
- "grad_norm": 98.4572525024414,
158
  "learning_rate": 9.173913602302594e-06,
159
- "loss": 27.7692,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
- "grad_norm": 118.89614868164062,
165
  "learning_rate": 8.857652465447947e-06,
166
- "loss": 24.9675,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
- "grad_norm": 89.57968139648438,
172
  "learning_rate": 8.497682584720065e-06,
173
- "loss": 24.3789,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
- "eval_loss": 80.99005126953125,
179
- "eval_runtime": 7.2774,
180
- "eval_samples_per_second": 15.94,
181
- "eval_steps_per_second": 0.55,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
- "grad_norm": 132.4896240234375,
187
  "learning_rate": 8.098082562704253e-06,
188
- "loss": 20.1034,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
- "grad_norm": 81.84835815429688,
194
  "learning_rate": 7.663380027322022e-06,
195
- "loss": 18.1971,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
- "grad_norm": 55.926551818847656,
201
  "learning_rate": 7.1985003319975696e-06,
202
- "loss": 17.9396,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
- "grad_norm": 64.777587890625,
208
  "learning_rate": 6.708710749435427e-06,
209
- "loss": 15.3426,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
- "grad_norm": 53.17697525024414,
215
  "learning_rate": 6.199560791316009e-06,
216
- "loss": 16.3762,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
- "eval_loss": 65.57749938964844,
222
- "eval_runtime": 7.054,
223
- "eval_samples_per_second": 16.445,
224
- "eval_steps_per_second": 0.567,
225
  "step": 1000
226
  },
227
  {
228
  "epoch": 49.523809523809526,
229
- "grad_norm": 140.15521240234375,
230
  "learning_rate": 5.676819330110808e-06,
231
- "loss": 13.8692,
232
  "step": 1040
233
  },
234
  {
235
  "epoch": 51.42857142857143,
236
- "grad_norm": 61.619049072265625,
237
  "learning_rate": 5.14640923545217e-06,
238
- "loss": 15.6367,
239
  "step": 1080
240
  },
241
  {
242
  "epoch": 53.333333333333336,
243
- "grad_norm": 147.7701873779297,
244
  "learning_rate": 4.614340265653695e-06,
245
- "loss": 13.0418,
246
  "step": 1120
247
  },
248
  {
249
  "epoch": 55.23809523809524,
250
- "grad_norm": 77.45187377929688,
251
  "learning_rate": 4.086640974747176e-06,
252
- "loss": 12.7083,
253
  "step": 1160
254
  },
255
  {
256
  "epoch": 57.142857142857146,
257
- "grad_norm": 215.86756896972656,
258
  "learning_rate": 3.569290406556519e-06,
259
- "loss": 13.65,
260
  "step": 1200
261
  },
262
  {
263
  "epoch": 57.142857142857146,
264
- "eval_loss": 58.122344970703125,
265
- "eval_runtime": 7.0277,
266
- "eval_samples_per_second": 16.506,
267
- "eval_steps_per_second": 0.569,
268
  "step": 1200
269
  }
270
  ],
 
1
  {
2
  "best_global_step": 1200,
3
+ "best_metric": 64.76508331298828,
4
  "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-1200",
5
  "epoch": 57.142857142857146,
6
  "eval_steps": 200,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
+ "grad_norm": 23553.73828125,
15
  "learning_rate": 7.8e-07,
16
+ "loss": 1390.8067,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
+ "grad_norm": 7427.158203125,
22
  "learning_rate": 1.5800000000000001e-06,
23
+ "loss": 1273.2542,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
+ "grad_norm": 6636.22314453125,
29
  "learning_rate": 2.38e-06,
30
+ "loss": 1059.9522,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
+ "grad_norm": 1760.4281005859375,
36
  "learning_rate": 3.1800000000000005e-06,
37
+ "loss": 790.0425,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
+ "grad_norm": 1625.7744140625,
43
  "learning_rate": 3.980000000000001e-06,
44
+ "loss": 517.1978,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
+ "eval_loss": 535.0709838867188,
50
+ "eval_runtime": 8.3595,
51
+ "eval_samples_per_second": 13.876,
52
+ "eval_steps_per_second": 0.478,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
+ "grad_norm": 1354.9310302734375,
58
  "learning_rate": 4.78e-06,
59
+ "loss": 353.8703,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
+ "grad_norm": 873.0985107421875,
65
  "learning_rate": 5.580000000000001e-06,
66
+ "loss": 262.4233,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
+ "grad_norm": 1516.567626953125,
72
  "learning_rate": 6.380000000000001e-06,
73
+ "loss": 205.628,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
+ "grad_norm": 5139.3779296875,
79
  "learning_rate": 7.180000000000001e-06,
80
+ "loss": 174.0845,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
+ "grad_norm": 480.6575622558594,
86
  "learning_rate": 7.980000000000002e-06,
87
+ "loss": 142.4243,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
+ "eval_loss": 214.48715209960938,
93
+ "eval_runtime": 7.9717,
94
+ "eval_samples_per_second": 14.551,
95
+ "eval_steps_per_second": 0.502,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
+ "grad_norm": 366.5032958984375,
101
  "learning_rate": 8.78e-06,
102
+ "loss": 115.4496,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
+ "grad_norm": 435.5931701660156,
108
  "learning_rate": 9.58e-06,
109
+ "loss": 92.9752,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
+ "grad_norm": 232.87234497070312,
115
  "learning_rate": 9.993604266900276e-06,
116
+ "loss": 78.7805,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
+ "grad_norm": 188.52627563476562,
122
  "learning_rate": 9.938441702975689e-06,
123
+ "loss": 59.0797,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
+ "grad_norm": 177.4958038330078,
129
  "learning_rate": 9.827324621255206e-06,
130
+ "loss": 46.9344,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
+ "eval_loss": 129.46548461914062,
136
+ "eval_runtime": 7.2365,
137
+ "eval_samples_per_second": 16.03,
138
+ "eval_steps_per_second": 0.553,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
+ "grad_norm": 135.09072875976562,
144
  "learning_rate": 9.661512022674852e-06,
145
+ "loss": 39.1039,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
+ "grad_norm": 128.56063842773438,
151
  "learning_rate": 9.4428826302319e-06,
152
+ "loss": 32.5044,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
+ "grad_norm": 99.94125366210938,
158
  "learning_rate": 9.173913602302594e-06,
159
+ "loss": 27.9798,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
+ "grad_norm": 128.35171508789062,
165
  "learning_rate": 8.857652465447947e-06,
166
+ "loss": 24.9475,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
+ "grad_norm": 113.99468231201172,
172
  "learning_rate": 8.497682584720065e-06,
173
+ "loss": 22.38,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
+ "eval_loss": 75.64146423339844,
179
+ "eval_runtime": 7.294,
180
+ "eval_samples_per_second": 15.904,
181
+ "eval_steps_per_second": 0.548,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
+ "grad_norm": 190.8120574951172,
187
  "learning_rate": 8.098082562704253e-06,
188
+ "loss": 19.868,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
+ "grad_norm": 58.219482421875,
194
  "learning_rate": 7.663380027322022e-06,
195
+ "loss": 22.0343,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
+ "grad_norm": 89.1216049194336,
201
  "learning_rate": 7.1985003319975696e-06,
202
+ "loss": 16.9501,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
+ "grad_norm": 147.2601776123047,
208
  "learning_rate": 6.708710749435427e-06,
209
+ "loss": 15.9704,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
+ "grad_norm": 103.56001281738281,
215
  "learning_rate": 6.199560791316009e-06,
216
+ "loss": 15.38,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
+ "eval_loss": 79.28771209716797,
222
+ "eval_runtime": 7.3262,
223
+ "eval_samples_per_second": 15.834,
224
+ "eval_steps_per_second": 0.546,
225
  "step": 1000
226
  },
227
  {
228
  "epoch": 49.523809523809526,
229
+ "grad_norm": 49.25249099731445,
230
  "learning_rate": 5.676819330110808e-06,
231
+ "loss": 14.5406,
232
  "step": 1040
233
  },
234
  {
235
  "epoch": 51.42857142857143,
236
+ "grad_norm": 116.98213195800781,
237
  "learning_rate": 5.14640923545217e-06,
238
+ "loss": 14.0173,
239
  "step": 1080
240
  },
241
  {
242
  "epoch": 53.333333333333336,
243
+ "grad_norm": 72.93340301513672,
244
  "learning_rate": 4.614340265653695e-06,
245
+ "loss": 13.7052,
246
  "step": 1120
247
  },
248
  {
249
  "epoch": 55.23809523809524,
250
+ "grad_norm": 105.87384796142578,
251
  "learning_rate": 4.086640974747176e-06,
252
+ "loss": 15.4002,
253
  "step": 1160
254
  },
255
  {
256
  "epoch": 57.142857142857146,
257
+ "grad_norm": 59.23909378051758,
258
  "learning_rate": 3.569290406556519e-06,
259
+ "loss": 14.1171,
260
  "step": 1200
261
  },
262
  {
263
  "epoch": 57.142857142857146,
264
+ "eval_loss": 64.76508331298828,
265
+ "eval_runtime": 7.1521,
266
+ "eval_samples_per_second": 16.219,
267
+ "eval_steps_per_second": 0.559,
268
  "step": 1200
269
  }
270
  ],
checkpoint-1200/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0e178d29d8bd1005e0ebacb559c58b57ef56dd83b9d009bde5540a5128a1d4a
3
  size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4185c232729e9f26479512d5ae60057198316f129ed28036d583f86d686e96
3
  size 5777
checkpoint-1400/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9857953e8c6751ca6b9433ec63d899eda71d75aad98c054ee0415c1dc3b37be2
3
  size 306699044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b94f7cad04f0747f4fc31287436697a425f647e5aefc95be51e2c7a837b4c9
3
  size 306699044
checkpoint-1400/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af83e894e9db3ea3ef841289f868d2ae7b8734b566076e82fec88fb48d45a921
3
  size 611580898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b3060dd15542402c6e8fb677b15552983f39dcb3f4878c90473c7cacf46766
3
  size 611580898
checkpoint-1400/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 1200,
3
- "best_metric": 58.122344970703125,
4
  "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-1200",
5
  "epoch": 66.66666666666667,
6
  "eval_steps": 200,
@@ -11,303 +11,303 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
- "grad_norm": 10800.818359375,
15
  "learning_rate": 7.8e-07,
16
- "loss": 1388.4377,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
- "grad_norm": 21733.138671875,
22
  "learning_rate": 1.5800000000000001e-06,
23
- "loss": 1273.0859,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
- "grad_norm": 2858.446044921875,
29
  "learning_rate": 2.38e-06,
30
- "loss": 1063.7887,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
- "grad_norm": 4524.0263671875,
36
  "learning_rate": 3.1800000000000005e-06,
37
- "loss": 771.9299,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
- "grad_norm": 2449.484130859375,
43
  "learning_rate": 3.980000000000001e-06,
44
- "loss": 519.636,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
- "eval_loss": 561.4364624023438,
50
- "eval_runtime": 8.2677,
51
- "eval_samples_per_second": 14.03,
52
- "eval_steps_per_second": 0.484,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
- "grad_norm": 3173.60546875,
58
  "learning_rate": 4.78e-06,
59
- "loss": 396.2762,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
- "grad_norm": 957.3853149414062,
65
  "learning_rate": 5.580000000000001e-06,
66
- "loss": 317.4443,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
- "grad_norm": 2215.865966796875,
72
  "learning_rate": 6.380000000000001e-06,
73
- "loss": 244.5671,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
- "grad_norm": 494.6834716796875,
79
  "learning_rate": 7.180000000000001e-06,
80
- "loss": 200.2045,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
- "grad_norm": 389.99395751953125,
86
  "learning_rate": 7.980000000000002e-06,
87
- "loss": 143.4672,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
- "eval_loss": 212.24693298339844,
93
- "eval_runtime": 7.6261,
94
- "eval_samples_per_second": 15.211,
95
- "eval_steps_per_second": 0.525,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
- "grad_norm": 296.1668395996094,
101
  "learning_rate": 8.78e-06,
102
- "loss": 108.6625,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
- "grad_norm": 270.3917541503906,
108
  "learning_rate": 9.58e-06,
109
- "loss": 86.1836,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
- "grad_norm": 252.61672973632812,
115
  "learning_rate": 9.993604266900276e-06,
116
- "loss": 66.8246,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
- "grad_norm": 176.43890380859375,
122
  "learning_rate": 9.938441702975689e-06,
123
- "loss": 52.6602,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
- "grad_norm": 143.43080139160156,
129
  "learning_rate": 9.827324621255206e-06,
130
- "loss": 45.2324,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
- "eval_loss": 131.0333709716797,
136
- "eval_runtime": 7.5366,
137
- "eval_samples_per_second": 15.392,
138
- "eval_steps_per_second": 0.531,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
- "grad_norm": 125.09796142578125,
144
  "learning_rate": 9.661512022674852e-06,
145
- "loss": 36.3571,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
- "grad_norm": 130.8366241455078,
151
  "learning_rate": 9.4428826302319e-06,
152
- "loss": 30.4001,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
- "grad_norm": 98.4572525024414,
158
  "learning_rate": 9.173913602302594e-06,
159
- "loss": 27.7692,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
- "grad_norm": 118.89614868164062,
165
  "learning_rate": 8.857652465447947e-06,
166
- "loss": 24.9675,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
- "grad_norm": 89.57968139648438,
172
  "learning_rate": 8.497682584720065e-06,
173
- "loss": 24.3789,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
- "eval_loss": 80.99005126953125,
179
- "eval_runtime": 7.2774,
180
- "eval_samples_per_second": 15.94,
181
- "eval_steps_per_second": 0.55,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
- "grad_norm": 132.4896240234375,
187
  "learning_rate": 8.098082562704253e-06,
188
- "loss": 20.1034,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
- "grad_norm": 81.84835815429688,
194
  "learning_rate": 7.663380027322022e-06,
195
- "loss": 18.1971,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
- "grad_norm": 55.926551818847656,
201
  "learning_rate": 7.1985003319975696e-06,
202
- "loss": 17.9396,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
- "grad_norm": 64.777587890625,
208
  "learning_rate": 6.708710749435427e-06,
209
- "loss": 15.3426,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
- "grad_norm": 53.17697525024414,
215
  "learning_rate": 6.199560791316009e-06,
216
- "loss": 16.3762,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
- "eval_loss": 65.57749938964844,
222
- "eval_runtime": 7.054,
223
- "eval_samples_per_second": 16.445,
224
- "eval_steps_per_second": 0.567,
225
  "step": 1000
226
  },
227
  {
228
  "epoch": 49.523809523809526,
229
- "grad_norm": 140.15521240234375,
230
  "learning_rate": 5.676819330110808e-06,
231
- "loss": 13.8692,
232
  "step": 1040
233
  },
234
  {
235
  "epoch": 51.42857142857143,
236
- "grad_norm": 61.619049072265625,
237
  "learning_rate": 5.14640923545217e-06,
238
- "loss": 15.6367,
239
  "step": 1080
240
  },
241
  {
242
  "epoch": 53.333333333333336,
243
- "grad_norm": 147.7701873779297,
244
  "learning_rate": 4.614340265653695e-06,
245
- "loss": 13.0418,
246
  "step": 1120
247
  },
248
  {
249
  "epoch": 55.23809523809524,
250
- "grad_norm": 77.45187377929688,
251
  "learning_rate": 4.086640974747176e-06,
252
- "loss": 12.7083,
253
  "step": 1160
254
  },
255
  {
256
  "epoch": 57.142857142857146,
257
- "grad_norm": 215.86756896972656,
258
  "learning_rate": 3.569290406556519e-06,
259
- "loss": 13.65,
260
  "step": 1200
261
  },
262
  {
263
  "epoch": 57.142857142857146,
264
- "eval_loss": 58.122344970703125,
265
- "eval_runtime": 7.0277,
266
- "eval_samples_per_second": 16.506,
267
- "eval_steps_per_second": 0.569,
268
  "step": 1200
269
  },
270
  {
271
  "epoch": 59.04761904761905,
272
- "grad_norm": 76.79309844970703,
273
  "learning_rate": 3.068150349742125e-06,
274
- "loss": 11.9538,
275
  "step": 1240
276
  },
277
  {
278
  "epoch": 60.95238095238095,
279
- "grad_norm": 52.94865417480469,
280
  "learning_rate": 2.5888989213931047e-06,
281
- "loss": 11.5392,
282
  "step": 1280
283
  },
284
  {
285
  "epoch": 62.857142857142854,
286
- "grad_norm": 153.39662170410156,
287
  "learning_rate": 2.1369662316918095e-06,
288
- "loss": 12.1913,
289
  "step": 1320
290
  },
291
  {
292
  "epoch": 64.76190476190476,
293
- "grad_norm": 139.7332305908203,
294
  "learning_rate": 1.717472858595694e-06,
295
- "loss": 11.2592,
296
  "step": 1360
297
  },
298
  {
299
  "epoch": 66.66666666666667,
300
- "grad_norm": 39.22611999511719,
301
  "learning_rate": 1.335171829642984e-06,
302
- "loss": 11.1986,
303
  "step": 1400
304
  },
305
  {
306
  "epoch": 66.66666666666667,
307
- "eval_loss": 60.4874382019043,
308
- "eval_runtime": 7.0545,
309
- "eval_samples_per_second": 16.443,
310
- "eval_steps_per_second": 0.567,
311
  "step": 1400
312
  }
313
  ],
 
1
  {
2
  "best_global_step": 1200,
3
+ "best_metric": 64.76508331298828,
4
  "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-1200",
5
  "epoch": 66.66666666666667,
6
  "eval_steps": 200,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
+ "grad_norm": 23553.73828125,
15
  "learning_rate": 7.8e-07,
16
+ "loss": 1390.8067,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
+ "grad_norm": 7427.158203125,
22
  "learning_rate": 1.5800000000000001e-06,
23
+ "loss": 1273.2542,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
+ "grad_norm": 6636.22314453125,
29
  "learning_rate": 2.38e-06,
30
+ "loss": 1059.9522,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
+ "grad_norm": 1760.4281005859375,
36
  "learning_rate": 3.1800000000000005e-06,
37
+ "loss": 790.0425,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
+ "grad_norm": 1625.7744140625,
43
  "learning_rate": 3.980000000000001e-06,
44
+ "loss": 517.1978,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
+ "eval_loss": 535.0709838867188,
50
+ "eval_runtime": 8.3595,
51
+ "eval_samples_per_second": 13.876,
52
+ "eval_steps_per_second": 0.478,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
+ "grad_norm": 1354.9310302734375,
58
  "learning_rate": 4.78e-06,
59
+ "loss": 353.8703,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
+ "grad_norm": 873.0985107421875,
65
  "learning_rate": 5.580000000000001e-06,
66
+ "loss": 262.4233,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
+ "grad_norm": 1516.567626953125,
72
  "learning_rate": 6.380000000000001e-06,
73
+ "loss": 205.628,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
+ "grad_norm": 5139.3779296875,
79
  "learning_rate": 7.180000000000001e-06,
80
+ "loss": 174.0845,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
+ "grad_norm": 480.6575622558594,
86
  "learning_rate": 7.980000000000002e-06,
87
+ "loss": 142.4243,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
+ "eval_loss": 214.48715209960938,
93
+ "eval_runtime": 7.9717,
94
+ "eval_samples_per_second": 14.551,
95
+ "eval_steps_per_second": 0.502,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
+ "grad_norm": 366.5032958984375,
101
  "learning_rate": 8.78e-06,
102
+ "loss": 115.4496,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
+ "grad_norm": 435.5931701660156,
108
  "learning_rate": 9.58e-06,
109
+ "loss": 92.9752,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
+ "grad_norm": 232.87234497070312,
115
  "learning_rate": 9.993604266900276e-06,
116
+ "loss": 78.7805,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
+ "grad_norm": 188.52627563476562,
122
  "learning_rate": 9.938441702975689e-06,
123
+ "loss": 59.0797,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
+ "grad_norm": 177.4958038330078,
129
  "learning_rate": 9.827324621255206e-06,
130
+ "loss": 46.9344,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
+ "eval_loss": 129.46548461914062,
136
+ "eval_runtime": 7.2365,
137
+ "eval_samples_per_second": 16.03,
138
+ "eval_steps_per_second": 0.553,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
+ "grad_norm": 135.09072875976562,
144
  "learning_rate": 9.661512022674852e-06,
145
+ "loss": 39.1039,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
+ "grad_norm": 128.56063842773438,
151
  "learning_rate": 9.4428826302319e-06,
152
+ "loss": 32.5044,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
+ "grad_norm": 99.94125366210938,
158
  "learning_rate": 9.173913602302594e-06,
159
+ "loss": 27.9798,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
+ "grad_norm": 128.35171508789062,
165
  "learning_rate": 8.857652465447947e-06,
166
+ "loss": 24.9475,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
+ "grad_norm": 113.99468231201172,
172
  "learning_rate": 8.497682584720065e-06,
173
+ "loss": 22.38,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
+ "eval_loss": 75.64146423339844,
179
+ "eval_runtime": 7.294,
180
+ "eval_samples_per_second": 15.904,
181
+ "eval_steps_per_second": 0.548,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
+ "grad_norm": 190.8120574951172,
187
  "learning_rate": 8.098082562704253e-06,
188
+ "loss": 19.868,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
+ "grad_norm": 58.219482421875,
194
  "learning_rate": 7.663380027322022e-06,
195
+ "loss": 22.0343,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
+ "grad_norm": 89.1216049194336,
201
  "learning_rate": 7.1985003319975696e-06,
202
+ "loss": 16.9501,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
+ "grad_norm": 147.2601776123047,
208
  "learning_rate": 6.708710749435427e-06,
209
+ "loss": 15.9704,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
+ "grad_norm": 103.56001281738281,
215
  "learning_rate": 6.199560791316009e-06,
216
+ "loss": 15.38,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
+ "eval_loss": 79.28771209716797,
222
+ "eval_runtime": 7.3262,
223
+ "eval_samples_per_second": 15.834,
224
+ "eval_steps_per_second": 0.546,
225
  "step": 1000
226
  },
227
  {
228
  "epoch": 49.523809523809526,
229
+ "grad_norm": 49.25249099731445,
230
  "learning_rate": 5.676819330110808e-06,
231
+ "loss": 14.5406,
232
  "step": 1040
233
  },
234
  {
235
  "epoch": 51.42857142857143,
236
+ "grad_norm": 116.98213195800781,
237
  "learning_rate": 5.14640923545217e-06,
238
+ "loss": 14.0173,
239
  "step": 1080
240
  },
241
  {
242
  "epoch": 53.333333333333336,
243
+ "grad_norm": 72.93340301513672,
244
  "learning_rate": 4.614340265653695e-06,
245
+ "loss": 13.7052,
246
  "step": 1120
247
  },
248
  {
249
  "epoch": 55.23809523809524,
250
+ "grad_norm": 105.87384796142578,
251
  "learning_rate": 4.086640974747176e-06,
252
+ "loss": 15.4002,
253
  "step": 1160
254
  },
255
  {
256
  "epoch": 57.142857142857146,
257
+ "grad_norm": 59.23909378051758,
258
  "learning_rate": 3.569290406556519e-06,
259
+ "loss": 14.1171,
260
  "step": 1200
261
  },
262
  {
263
  "epoch": 57.142857142857146,
264
+ "eval_loss": 64.76508331298828,
265
+ "eval_runtime": 7.1521,
266
+ "eval_samples_per_second": 16.219,
267
+ "eval_steps_per_second": 0.559,
268
  "step": 1200
269
  },
270
  {
271
  "epoch": 59.04761904761905,
272
+ "grad_norm": 98.89187622070312,
273
  "learning_rate": 3.068150349742125e-06,
274
+ "loss": 12.3597,
275
  "step": 1240
276
  },
277
  {
278
  "epoch": 60.95238095238095,
279
+ "grad_norm": 179.5811004638672,
280
  "learning_rate": 2.5888989213931047e-06,
281
+ "loss": 12.125,
282
  "step": 1280
283
  },
284
  {
285
  "epoch": 62.857142857142854,
286
+ "grad_norm": 41.74519729614258,
287
  "learning_rate": 2.1369662316918095e-06,
288
+ "loss": 11.7567,
289
  "step": 1320
290
  },
291
  {
292
  "epoch": 64.76190476190476,
293
+ "grad_norm": 60.97040557861328,
294
  "learning_rate": 1.717472858595694e-06,
295
+ "loss": 12.3442,
296
  "step": 1360
297
  },
298
  {
299
  "epoch": 66.66666666666667,
300
+ "grad_norm": 197.75689697265625,
301
  "learning_rate": 1.335171829642984e-06,
302
+ "loss": 11.2475,
303
  "step": 1400
304
  },
305
  {
306
  "epoch": 66.66666666666667,
307
+ "eval_loss": 67.15937805175781,
308
+ "eval_runtime": 7.2137,
309
+ "eval_samples_per_second": 16.08,
310
+ "eval_steps_per_second": 0.554,
311
  "step": 1400
312
  }
313
  ],
checkpoint-1400/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0e178d29d8bd1005e0ebacb559c58b57ef56dd83b9d009bde5540a5128a1d4a
3
  size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4185c232729e9f26479512d5ae60057198316f129ed28036d583f86d686e96
3
  size 5777
checkpoint-1600/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fc83e9ffb9854d6f50d18b71d3610cb9b411e48e241f6e96354f2fd615e20c4
3
  size 306699044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f7b879ae25a9c83fa3e849fdb085366c9c812655437eeedbdf446b895a6cc5
3
  size 306699044
checkpoint-1600/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eeced65cec309962d348ad3cc1ce6362b5a5185260bc0875e65a5f20507842a8
3
  size 611580898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42bd1f3c13814d5212492c77c1c29fe168190fb0ed3c50250e6af351070f0d2b
3
  size 611580898
checkpoint-1600/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 1600,
3
- "best_metric": 56.9376335144043,
4
  "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-1600",
5
  "epoch": 76.19047619047619,
6
  "eval_steps": 200,
@@ -11,346 +11,346 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
- "grad_norm": 10800.818359375,
15
  "learning_rate": 7.8e-07,
16
- "loss": 1388.4377,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
- "grad_norm": 21733.138671875,
22
  "learning_rate": 1.5800000000000001e-06,
23
- "loss": 1273.0859,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
- "grad_norm": 2858.446044921875,
29
  "learning_rate": 2.38e-06,
30
- "loss": 1063.7887,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
- "grad_norm": 4524.0263671875,
36
  "learning_rate": 3.1800000000000005e-06,
37
- "loss": 771.9299,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
- "grad_norm": 2449.484130859375,
43
  "learning_rate": 3.980000000000001e-06,
44
- "loss": 519.636,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
- "eval_loss": 561.4364624023438,
50
- "eval_runtime": 8.2677,
51
- "eval_samples_per_second": 14.03,
52
- "eval_steps_per_second": 0.484,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
- "grad_norm": 3173.60546875,
58
  "learning_rate": 4.78e-06,
59
- "loss": 396.2762,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
- "grad_norm": 957.3853149414062,
65
  "learning_rate": 5.580000000000001e-06,
66
- "loss": 317.4443,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
- "grad_norm": 2215.865966796875,
72
  "learning_rate": 6.380000000000001e-06,
73
- "loss": 244.5671,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
- "grad_norm": 494.6834716796875,
79
  "learning_rate": 7.180000000000001e-06,
80
- "loss": 200.2045,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
- "grad_norm": 389.99395751953125,
86
  "learning_rate": 7.980000000000002e-06,
87
- "loss": 143.4672,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
- "eval_loss": 212.24693298339844,
93
- "eval_runtime": 7.6261,
94
- "eval_samples_per_second": 15.211,
95
- "eval_steps_per_second": 0.525,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
- "grad_norm": 296.1668395996094,
101
  "learning_rate": 8.78e-06,
102
- "loss": 108.6625,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
- "grad_norm": 270.3917541503906,
108
  "learning_rate": 9.58e-06,
109
- "loss": 86.1836,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
- "grad_norm": 252.61672973632812,
115
  "learning_rate": 9.993604266900276e-06,
116
- "loss": 66.8246,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
- "grad_norm": 176.43890380859375,
122
  "learning_rate": 9.938441702975689e-06,
123
- "loss": 52.6602,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
- "grad_norm": 143.43080139160156,
129
  "learning_rate": 9.827324621255206e-06,
130
- "loss": 45.2324,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
- "eval_loss": 131.0333709716797,
136
- "eval_runtime": 7.5366,
137
- "eval_samples_per_second": 15.392,
138
- "eval_steps_per_second": 0.531,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
- "grad_norm": 125.09796142578125,
144
  "learning_rate": 9.661512022674852e-06,
145
- "loss": 36.3571,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
- "grad_norm": 130.8366241455078,
151
  "learning_rate": 9.4428826302319e-06,
152
- "loss": 30.4001,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
- "grad_norm": 98.4572525024414,
158
  "learning_rate": 9.173913602302594e-06,
159
- "loss": 27.7692,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
- "grad_norm": 118.89614868164062,
165
  "learning_rate": 8.857652465447947e-06,
166
- "loss": 24.9675,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
- "grad_norm": 89.57968139648438,
172
  "learning_rate": 8.497682584720065e-06,
173
- "loss": 24.3789,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
- "eval_loss": 80.99005126953125,
179
- "eval_runtime": 7.2774,
180
- "eval_samples_per_second": 15.94,
181
- "eval_steps_per_second": 0.55,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
- "grad_norm": 132.4896240234375,
187
  "learning_rate": 8.098082562704253e-06,
188
- "loss": 20.1034,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
- "grad_norm": 81.84835815429688,
194
  "learning_rate": 7.663380027322022e-06,
195
- "loss": 18.1971,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
- "grad_norm": 55.926551818847656,
201
  "learning_rate": 7.1985003319975696e-06,
202
- "loss": 17.9396,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
- "grad_norm": 64.777587890625,
208
  "learning_rate": 6.708710749435427e-06,
209
- "loss": 15.3426,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
- "grad_norm": 53.17697525024414,
215
  "learning_rate": 6.199560791316009e-06,
216
- "loss": 16.3762,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
- "eval_loss": 65.57749938964844,
222
- "eval_runtime": 7.054,
223
- "eval_samples_per_second": 16.445,
224
- "eval_steps_per_second": 0.567,
225
  "step": 1000
226
  },
227
  {
228
  "epoch": 49.523809523809526,
229
- "grad_norm": 140.15521240234375,
230
  "learning_rate": 5.676819330110808e-06,
231
- "loss": 13.8692,
232
  "step": 1040
233
  },
234
  {
235
  "epoch": 51.42857142857143,
236
- "grad_norm": 61.619049072265625,
237
  "learning_rate": 5.14640923545217e-06,
238
- "loss": 15.6367,
239
  "step": 1080
240
  },
241
  {
242
  "epoch": 53.333333333333336,
243
- "grad_norm": 147.7701873779297,
244
  "learning_rate": 4.614340265653695e-06,
245
- "loss": 13.0418,
246
  "step": 1120
247
  },
248
  {
249
  "epoch": 55.23809523809524,
250
- "grad_norm": 77.45187377929688,
251
  "learning_rate": 4.086640974747176e-06,
252
- "loss": 12.7083,
253
  "step": 1160
254
  },
255
  {
256
  "epoch": 57.142857142857146,
257
- "grad_norm": 215.86756896972656,
258
  "learning_rate": 3.569290406556519e-06,
259
- "loss": 13.65,
260
  "step": 1200
261
  },
262
  {
263
  "epoch": 57.142857142857146,
264
- "eval_loss": 58.122344970703125,
265
- "eval_runtime": 7.0277,
266
- "eval_samples_per_second": 16.506,
267
- "eval_steps_per_second": 0.569,
268
  "step": 1200
269
  },
270
  {
271
  "epoch": 59.04761904761905,
272
- "grad_norm": 76.79309844970703,
273
  "learning_rate": 3.068150349742125e-06,
274
- "loss": 11.9538,
275
  "step": 1240
276
  },
277
  {
278
  "epoch": 60.95238095238095,
279
- "grad_norm": 52.94865417480469,
280
  "learning_rate": 2.5888989213931047e-06,
281
- "loss": 11.5392,
282
  "step": 1280
283
  },
284
  {
285
  "epoch": 62.857142857142854,
286
- "grad_norm": 153.39662170410156,
287
  "learning_rate": 2.1369662316918095e-06,
288
- "loss": 12.1913,
289
  "step": 1320
290
  },
291
  {
292
  "epoch": 64.76190476190476,
293
- "grad_norm": 139.7332305908203,
294
  "learning_rate": 1.717472858595694e-06,
295
- "loss": 11.2592,
296
  "step": 1360
297
  },
298
  {
299
  "epoch": 66.66666666666667,
300
- "grad_norm": 39.22611999511719,
301
  "learning_rate": 1.335171829642984e-06,
302
- "loss": 11.1986,
303
  "step": 1400
304
  },
305
  {
306
  "epoch": 66.66666666666667,
307
- "eval_loss": 60.4874382019043,
308
- "eval_runtime": 7.0545,
309
- "eval_samples_per_second": 16.443,
310
- "eval_steps_per_second": 0.567,
311
  "step": 1400
312
  },
313
  {
314
  "epoch": 68.57142857142857,
315
- "grad_norm": 80.5909423828125,
316
  "learning_rate": 9.943947682514538e-07,
317
- "loss": 11.4891,
318
  "step": 1440
319
  },
320
  {
321
  "epoch": 70.47619047619048,
322
- "grad_norm": 92.74817657470703,
323
  "learning_rate": 6.990028146942752e-07,
324
- "loss": 11.4975,
325
  "step": 1480
326
  },
327
  {
328
  "epoch": 72.38095238095238,
329
- "grad_norm": 58.092369079589844,
330
  "learning_rate": 4.523428778378852e-07,
331
- "loss": 11.2603,
332
  "step": 1520
333
  },
334
  {
335
  "epoch": 74.28571428571429,
336
- "grad_norm": 171.13815307617188,
337
  "learning_rate": 2.5720971332717783e-07,
338
- "loss": 10.5585,
339
  "step": 1560
340
  },
341
  {
342
  "epoch": 76.19047619047619,
343
- "grad_norm": 49.394981384277344,
344
  "learning_rate": 1.158142578873156e-07,
345
- "loss": 10.9979,
346
  "step": 1600
347
  },
348
  {
349
  "epoch": 76.19047619047619,
350
- "eval_loss": 56.9376335144043,
351
- "eval_runtime": 7.1981,
352
- "eval_samples_per_second": 16.115,
353
- "eval_steps_per_second": 0.556,
354
  "step": 1600
355
  }
356
  ],
 
1
  {
2
  "best_global_step": 1600,
3
+ "best_metric": 62.760154724121094,
4
  "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-1600",
5
  "epoch": 76.19047619047619,
6
  "eval_steps": 200,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
+ "grad_norm": 23553.73828125,
15
  "learning_rate": 7.8e-07,
16
+ "loss": 1390.8067,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
+ "grad_norm": 7427.158203125,
22
  "learning_rate": 1.5800000000000001e-06,
23
+ "loss": 1273.2542,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
+ "grad_norm": 6636.22314453125,
29
  "learning_rate": 2.38e-06,
30
+ "loss": 1059.9522,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
+ "grad_norm": 1760.4281005859375,
36
  "learning_rate": 3.1800000000000005e-06,
37
+ "loss": 790.0425,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
+ "grad_norm": 1625.7744140625,
43
  "learning_rate": 3.980000000000001e-06,
44
+ "loss": 517.1978,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
+ "eval_loss": 535.0709838867188,
50
+ "eval_runtime": 8.3595,
51
+ "eval_samples_per_second": 13.876,
52
+ "eval_steps_per_second": 0.478,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
+ "grad_norm": 1354.9310302734375,
58
  "learning_rate": 4.78e-06,
59
+ "loss": 353.8703,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
+ "grad_norm": 873.0985107421875,
65
  "learning_rate": 5.580000000000001e-06,
66
+ "loss": 262.4233,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
+ "grad_norm": 1516.567626953125,
72
  "learning_rate": 6.380000000000001e-06,
73
+ "loss": 205.628,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
+ "grad_norm": 5139.3779296875,
79
  "learning_rate": 7.180000000000001e-06,
80
+ "loss": 174.0845,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
+ "grad_norm": 480.6575622558594,
86
  "learning_rate": 7.980000000000002e-06,
87
+ "loss": 142.4243,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
+ "eval_loss": 214.48715209960938,
93
+ "eval_runtime": 7.9717,
94
+ "eval_samples_per_second": 14.551,
95
+ "eval_steps_per_second": 0.502,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
+ "grad_norm": 366.5032958984375,
101
  "learning_rate": 8.78e-06,
102
+ "loss": 115.4496,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
+ "grad_norm": 435.5931701660156,
108
  "learning_rate": 9.58e-06,
109
+ "loss": 92.9752,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
+ "grad_norm": 232.87234497070312,
115
  "learning_rate": 9.993604266900276e-06,
116
+ "loss": 78.7805,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
+ "grad_norm": 188.52627563476562,
122
  "learning_rate": 9.938441702975689e-06,
123
+ "loss": 59.0797,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
+ "grad_norm": 177.4958038330078,
129
  "learning_rate": 9.827324621255206e-06,
130
+ "loss": 46.9344,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
+ "eval_loss": 129.46548461914062,
136
+ "eval_runtime": 7.2365,
137
+ "eval_samples_per_second": 16.03,
138
+ "eval_steps_per_second": 0.553,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
+ "grad_norm": 135.09072875976562,
144
  "learning_rate": 9.661512022674852e-06,
145
+ "loss": 39.1039,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
+ "grad_norm": 128.56063842773438,
151
  "learning_rate": 9.4428826302319e-06,
152
+ "loss": 32.5044,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
+ "grad_norm": 99.94125366210938,
158
  "learning_rate": 9.173913602302594e-06,
159
+ "loss": 27.9798,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
+ "grad_norm": 128.35171508789062,
165
  "learning_rate": 8.857652465447947e-06,
166
+ "loss": 24.9475,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
+ "grad_norm": 113.99468231201172,
172
  "learning_rate": 8.497682584720065e-06,
173
+ "loss": 22.38,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
+ "eval_loss": 75.64146423339844,
179
+ "eval_runtime": 7.294,
180
+ "eval_samples_per_second": 15.904,
181
+ "eval_steps_per_second": 0.548,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
+ "grad_norm": 190.8120574951172,
187
  "learning_rate": 8.098082562704253e-06,
188
+ "loss": 19.868,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
+ "grad_norm": 58.219482421875,
194
  "learning_rate": 7.663380027322022e-06,
195
+ "loss": 22.0343,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
+ "grad_norm": 89.1216049194336,
201
  "learning_rate": 7.1985003319975696e-06,
202
+ "loss": 16.9501,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
+ "grad_norm": 147.2601776123047,
208
  "learning_rate": 6.708710749435427e-06,
209
+ "loss": 15.9704,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
+ "grad_norm": 103.56001281738281,
215
  "learning_rate": 6.199560791316009e-06,
216
+ "loss": 15.38,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
+ "eval_loss": 79.28771209716797,
222
+ "eval_runtime": 7.3262,
223
+ "eval_samples_per_second": 15.834,
224
+ "eval_steps_per_second": 0.546,
225
  "step": 1000
226
  },
227
  {
228
  "epoch": 49.523809523809526,
229
+ "grad_norm": 49.25249099731445,
230
  "learning_rate": 5.676819330110808e-06,
231
+ "loss": 14.5406,
232
  "step": 1040
233
  },
234
  {
235
  "epoch": 51.42857142857143,
236
+ "grad_norm": 116.98213195800781,
237
  "learning_rate": 5.14640923545217e-06,
238
+ "loss": 14.0173,
239
  "step": 1080
240
  },
241
  {
242
  "epoch": 53.333333333333336,
243
+ "grad_norm": 72.93340301513672,
244
  "learning_rate": 4.614340265653695e-06,
245
+ "loss": 13.7052,
246
  "step": 1120
247
  },
248
  {
249
  "epoch": 55.23809523809524,
250
+ "grad_norm": 105.87384796142578,
251
  "learning_rate": 4.086640974747176e-06,
252
+ "loss": 15.4002,
253
  "step": 1160
254
  },
255
  {
256
  "epoch": 57.142857142857146,
257
+ "grad_norm": 59.23909378051758,
258
  "learning_rate": 3.569290406556519e-06,
259
+ "loss": 14.1171,
260
  "step": 1200
261
  },
262
  {
263
  "epoch": 57.142857142857146,
264
+ "eval_loss": 64.76508331298828,
265
+ "eval_runtime": 7.1521,
266
+ "eval_samples_per_second": 16.219,
267
+ "eval_steps_per_second": 0.559,
268
  "step": 1200
269
  },
270
  {
271
  "epoch": 59.04761904761905,
272
+ "grad_norm": 98.89187622070312,
273
  "learning_rate": 3.068150349742125e-06,
274
+ "loss": 12.3597,
275
  "step": 1240
276
  },
277
  {
278
  "epoch": 60.95238095238095,
279
+ "grad_norm": 179.5811004638672,
280
  "learning_rate": 2.5888989213931047e-06,
281
+ "loss": 12.125,
282
  "step": 1280
283
  },
284
  {
285
  "epoch": 62.857142857142854,
286
+ "grad_norm": 41.74519729614258,
287
  "learning_rate": 2.1369662316918095e-06,
288
+ "loss": 11.7567,
289
  "step": 1320
290
  },
291
  {
292
  "epoch": 64.76190476190476,
293
+ "grad_norm": 60.97040557861328,
294
  "learning_rate": 1.717472858595694e-06,
295
+ "loss": 12.3442,
296
  "step": 1360
297
  },
298
  {
299
  "epoch": 66.66666666666667,
300
+ "grad_norm": 197.75689697265625,
301
  "learning_rate": 1.335171829642984e-06,
302
+ "loss": 11.2475,
303
  "step": 1400
304
  },
305
  {
306
  "epoch": 66.66666666666667,
307
+ "eval_loss": 67.15937805175781,
308
+ "eval_runtime": 7.2137,
309
+ "eval_samples_per_second": 16.08,
310
+ "eval_steps_per_second": 0.554,
311
  "step": 1400
312
  },
313
  {
314
  "epoch": 68.57142857142857,
315
+ "grad_norm": 44.10291290283203,
316
  "learning_rate": 9.943947682514538e-07,
317
+ "loss": 11.0752,
318
  "step": 1440
319
  },
320
  {
321
  "epoch": 70.47619047619048,
322
+ "grad_norm": 54.24775314331055,
323
  "learning_rate": 6.990028146942752e-07,
324
+ "loss": 10.9762,
325
  "step": 1480
326
  },
327
  {
328
  "epoch": 72.38095238095238,
329
+ "grad_norm": 41.610679626464844,
330
  "learning_rate": 4.523428778378852e-07,
331
+ "loss": 10.9561,
332
  "step": 1520
333
  },
334
  {
335
  "epoch": 74.28571428571429,
336
+ "grad_norm": 109.53748321533203,
337
  "learning_rate": 2.5720971332717783e-07,
338
+ "loss": 10.7585,
339
  "step": 1560
340
  },
341
  {
342
  "epoch": 76.19047619047619,
343
+ "grad_norm": 49.795623779296875,
344
  "learning_rate": 1.158142578873156e-07,
345
+ "loss": 11.5847,
346
  "step": 1600
347
  },
348
  {
349
  "epoch": 76.19047619047619,
350
+ "eval_loss": 62.760154724121094,
351
+ "eval_runtime": 7.2145,
352
+ "eval_samples_per_second": 16.079,
353
+ "eval_steps_per_second": 0.554,
354
  "step": 1600
355
  }
356
  ],
checkpoint-1600/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0e178d29d8bd1005e0ebacb559c58b57ef56dd83b9d009bde5540a5128a1d4a
3
  size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4185c232729e9f26479512d5ae60057198316f129ed28036d583f86d686e96
3
  size 5777
checkpoint-1680/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e6958fd8fe84e6136b94d989b92cd00c3dfc3947d99e2b30a1a4201250d271a
3
  size 306699044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88b3beaaa4ee5fccf6c71430bbd8100000f005578591d22ab5699386131e4bc
3
  size 306699044
checkpoint-1680/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a82a34a038f51ea8dedcb9407415208265fc33837121a7bed52250968ca50771
3
  size 611580898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:536085da2f334c9892cc0fd076775e648bdc3fc281e1b08ea6bcd728d51b725c
3
  size 611580898
checkpoint-1680/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 1600,
3
- "best_metric": 56.9376335144043,
4
  "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-1600",
5
  "epoch": 80.0,
6
  "eval_steps": 200,
@@ -11,360 +11,360 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
- "grad_norm": 10800.818359375,
15
  "learning_rate": 7.8e-07,
16
- "loss": 1388.4377,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
- "grad_norm": 21733.138671875,
22
  "learning_rate": 1.5800000000000001e-06,
23
- "loss": 1273.0859,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
- "grad_norm": 2858.446044921875,
29
  "learning_rate": 2.38e-06,
30
- "loss": 1063.7887,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
- "grad_norm": 4524.0263671875,
36
  "learning_rate": 3.1800000000000005e-06,
37
- "loss": 771.9299,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
- "grad_norm": 2449.484130859375,
43
  "learning_rate": 3.980000000000001e-06,
44
- "loss": 519.636,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
- "eval_loss": 561.4364624023438,
50
- "eval_runtime": 8.2677,
51
- "eval_samples_per_second": 14.03,
52
- "eval_steps_per_second": 0.484,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
- "grad_norm": 3173.60546875,
58
  "learning_rate": 4.78e-06,
59
- "loss": 396.2762,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
- "grad_norm": 957.3853149414062,
65
  "learning_rate": 5.580000000000001e-06,
66
- "loss": 317.4443,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
- "grad_norm": 2215.865966796875,
72
  "learning_rate": 6.380000000000001e-06,
73
- "loss": 244.5671,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
- "grad_norm": 494.6834716796875,
79
  "learning_rate": 7.180000000000001e-06,
80
- "loss": 200.2045,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
- "grad_norm": 389.99395751953125,
86
  "learning_rate": 7.980000000000002e-06,
87
- "loss": 143.4672,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
- "eval_loss": 212.24693298339844,
93
- "eval_runtime": 7.6261,
94
- "eval_samples_per_second": 15.211,
95
- "eval_steps_per_second": 0.525,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
- "grad_norm": 296.1668395996094,
101
  "learning_rate": 8.78e-06,
102
- "loss": 108.6625,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
- "grad_norm": 270.3917541503906,
108
  "learning_rate": 9.58e-06,
109
- "loss": 86.1836,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
- "grad_norm": 252.61672973632812,
115
  "learning_rate": 9.993604266900276e-06,
116
- "loss": 66.8246,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
- "grad_norm": 176.43890380859375,
122
  "learning_rate": 9.938441702975689e-06,
123
- "loss": 52.6602,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
- "grad_norm": 143.43080139160156,
129
  "learning_rate": 9.827324621255206e-06,
130
- "loss": 45.2324,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
- "eval_loss": 131.0333709716797,
136
- "eval_runtime": 7.5366,
137
- "eval_samples_per_second": 15.392,
138
- "eval_steps_per_second": 0.531,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
- "grad_norm": 125.09796142578125,
144
  "learning_rate": 9.661512022674852e-06,
145
- "loss": 36.3571,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
- "grad_norm": 130.8366241455078,
151
  "learning_rate": 9.4428826302319e-06,
152
- "loss": 30.4001,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
- "grad_norm": 98.4572525024414,
158
  "learning_rate": 9.173913602302594e-06,
159
- "loss": 27.7692,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
- "grad_norm": 118.89614868164062,
165
  "learning_rate": 8.857652465447947e-06,
166
- "loss": 24.9675,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
- "grad_norm": 89.57968139648438,
172
  "learning_rate": 8.497682584720065e-06,
173
- "loss": 24.3789,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
- "eval_loss": 80.99005126953125,
179
- "eval_runtime": 7.2774,
180
- "eval_samples_per_second": 15.94,
181
- "eval_steps_per_second": 0.55,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
- "grad_norm": 132.4896240234375,
187
  "learning_rate": 8.098082562704253e-06,
188
- "loss": 20.1034,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
- "grad_norm": 81.84835815429688,
194
  "learning_rate": 7.663380027322022e-06,
195
- "loss": 18.1971,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
- "grad_norm": 55.926551818847656,
201
  "learning_rate": 7.1985003319975696e-06,
202
- "loss": 17.9396,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
- "grad_norm": 64.777587890625,
208
  "learning_rate": 6.708710749435427e-06,
209
- "loss": 15.3426,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
- "grad_norm": 53.17697525024414,
215
  "learning_rate": 6.199560791316009e-06,
216
- "loss": 16.3762,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
- "eval_loss": 65.57749938964844,
222
- "eval_runtime": 7.054,
223
- "eval_samples_per_second": 16.445,
224
- "eval_steps_per_second": 0.567,
225
  "step": 1000
226
  },
227
  {
228
  "epoch": 49.523809523809526,
229
- "grad_norm": 140.15521240234375,
230
  "learning_rate": 5.676819330110808e-06,
231
- "loss": 13.8692,
232
  "step": 1040
233
  },
234
  {
235
  "epoch": 51.42857142857143,
236
- "grad_norm": 61.619049072265625,
237
  "learning_rate": 5.14640923545217e-06,
238
- "loss": 15.6367,
239
  "step": 1080
240
  },
241
  {
242
  "epoch": 53.333333333333336,
243
- "grad_norm": 147.7701873779297,
244
  "learning_rate": 4.614340265653695e-06,
245
- "loss": 13.0418,
246
  "step": 1120
247
  },
248
  {
249
  "epoch": 55.23809523809524,
250
- "grad_norm": 77.45187377929688,
251
  "learning_rate": 4.086640974747176e-06,
252
- "loss": 12.7083,
253
  "step": 1160
254
  },
255
  {
256
  "epoch": 57.142857142857146,
257
- "grad_norm": 215.86756896972656,
258
  "learning_rate": 3.569290406556519e-06,
259
- "loss": 13.65,
260
  "step": 1200
261
  },
262
  {
263
  "epoch": 57.142857142857146,
264
- "eval_loss": 58.122344970703125,
265
- "eval_runtime": 7.0277,
266
- "eval_samples_per_second": 16.506,
267
- "eval_steps_per_second": 0.569,
268
  "step": 1200
269
  },
270
  {
271
  "epoch": 59.04761904761905,
272
- "grad_norm": 76.79309844970703,
273
  "learning_rate": 3.068150349742125e-06,
274
- "loss": 11.9538,
275
  "step": 1240
276
  },
277
  {
278
  "epoch": 60.95238095238095,
279
- "grad_norm": 52.94865417480469,
280
  "learning_rate": 2.5888989213931047e-06,
281
- "loss": 11.5392,
282
  "step": 1280
283
  },
284
  {
285
  "epoch": 62.857142857142854,
286
- "grad_norm": 153.39662170410156,
287
  "learning_rate": 2.1369662316918095e-06,
288
- "loss": 12.1913,
289
  "step": 1320
290
  },
291
  {
292
  "epoch": 64.76190476190476,
293
- "grad_norm": 139.7332305908203,
294
  "learning_rate": 1.717472858595694e-06,
295
- "loss": 11.2592,
296
  "step": 1360
297
  },
298
  {
299
  "epoch": 66.66666666666667,
300
- "grad_norm": 39.22611999511719,
301
  "learning_rate": 1.335171829642984e-06,
302
- "loss": 11.1986,
303
  "step": 1400
304
  },
305
  {
306
  "epoch": 66.66666666666667,
307
- "eval_loss": 60.4874382019043,
308
- "eval_runtime": 7.0545,
309
- "eval_samples_per_second": 16.443,
310
- "eval_steps_per_second": 0.567,
311
  "step": 1400
312
  },
313
  {
314
  "epoch": 68.57142857142857,
315
- "grad_norm": 80.5909423828125,
316
  "learning_rate": 9.943947682514538e-07,
317
- "loss": 11.4891,
318
  "step": 1440
319
  },
320
  {
321
  "epoch": 70.47619047619048,
322
- "grad_norm": 92.74817657470703,
323
  "learning_rate": 6.990028146942752e-07,
324
- "loss": 11.4975,
325
  "step": 1480
326
  },
327
  {
328
  "epoch": 72.38095238095238,
329
- "grad_norm": 58.092369079589844,
330
  "learning_rate": 4.523428778378852e-07,
331
- "loss": 11.2603,
332
  "step": 1520
333
  },
334
  {
335
  "epoch": 74.28571428571429,
336
- "grad_norm": 171.13815307617188,
337
  "learning_rate": 2.5720971332717783e-07,
338
- "loss": 10.5585,
339
  "step": 1560
340
  },
341
  {
342
  "epoch": 76.19047619047619,
343
- "grad_norm": 49.394981384277344,
344
  "learning_rate": 1.158142578873156e-07,
345
- "loss": 10.9979,
346
  "step": 1600
347
  },
348
  {
349
  "epoch": 76.19047619047619,
350
- "eval_loss": 56.9376335144043,
351
- "eval_runtime": 7.1981,
352
- "eval_samples_per_second": 16.115,
353
- "eval_steps_per_second": 0.556,
354
  "step": 1600
355
  },
356
  {
357
  "epoch": 78.0952380952381,
358
- "grad_norm": 55.77260971069336,
359
  "learning_rate": 2.9758578527189776e-08,
360
- "loss": 10.6299,
361
  "step": 1640
362
  },
363
  {
364
  "epoch": 80.0,
365
- "grad_norm": 59.74852752685547,
366
  "learning_rate": 1.7720480054994606e-11,
367
- "loss": 10.527,
368
  "step": 1680
369
  }
370
  ],
 
1
  {
2
  "best_global_step": 1600,
3
+ "best_metric": 62.760154724121094,
4
  "best_model_checkpoint": "rtdetr_voucher_model/checkpoint-1600",
5
  "epoch": 80.0,
6
  "eval_steps": 200,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.9047619047619047,
14
+ "grad_norm": 23553.73828125,
15
  "learning_rate": 7.8e-07,
16
+ "loss": 1390.8067,
17
  "step": 40
18
  },
19
  {
20
  "epoch": 3.8095238095238093,
21
+ "grad_norm": 7427.158203125,
22
  "learning_rate": 1.5800000000000001e-06,
23
+ "loss": 1273.2542,
24
  "step": 80
25
  },
26
  {
27
  "epoch": 5.714285714285714,
28
+ "grad_norm": 6636.22314453125,
29
  "learning_rate": 2.38e-06,
30
+ "loss": 1059.9522,
31
  "step": 120
32
  },
33
  {
34
  "epoch": 7.619047619047619,
35
+ "grad_norm": 1760.4281005859375,
36
  "learning_rate": 3.1800000000000005e-06,
37
+ "loss": 790.0425,
38
  "step": 160
39
  },
40
  {
41
  "epoch": 9.523809523809524,
42
+ "grad_norm": 1625.7744140625,
43
  "learning_rate": 3.980000000000001e-06,
44
+ "loss": 517.1978,
45
  "step": 200
46
  },
47
  {
48
  "epoch": 9.523809523809524,
49
+ "eval_loss": 535.0709838867188,
50
+ "eval_runtime": 8.3595,
51
+ "eval_samples_per_second": 13.876,
52
+ "eval_steps_per_second": 0.478,
53
  "step": 200
54
  },
55
  {
56
  "epoch": 11.428571428571429,
57
+ "grad_norm": 1354.9310302734375,
58
  "learning_rate": 4.78e-06,
59
+ "loss": 353.8703,
60
  "step": 240
61
  },
62
  {
63
  "epoch": 13.333333333333334,
64
+ "grad_norm": 873.0985107421875,
65
  "learning_rate": 5.580000000000001e-06,
66
+ "loss": 262.4233,
67
  "step": 280
68
  },
69
  {
70
  "epoch": 15.238095238095237,
71
+ "grad_norm": 1516.567626953125,
72
  "learning_rate": 6.380000000000001e-06,
73
+ "loss": 205.628,
74
  "step": 320
75
  },
76
  {
77
  "epoch": 17.142857142857142,
78
+ "grad_norm": 5139.3779296875,
79
  "learning_rate": 7.180000000000001e-06,
80
+ "loss": 174.0845,
81
  "step": 360
82
  },
83
  {
84
  "epoch": 19.047619047619047,
85
+ "grad_norm": 480.6575622558594,
86
  "learning_rate": 7.980000000000002e-06,
87
+ "loss": 142.4243,
88
  "step": 400
89
  },
90
  {
91
  "epoch": 19.047619047619047,
92
+ "eval_loss": 214.48715209960938,
93
+ "eval_runtime": 7.9717,
94
+ "eval_samples_per_second": 14.551,
95
+ "eval_steps_per_second": 0.502,
96
  "step": 400
97
  },
98
  {
99
  "epoch": 20.952380952380953,
100
+ "grad_norm": 366.5032958984375,
101
  "learning_rate": 8.78e-06,
102
+ "loss": 115.4496,
103
  "step": 440
104
  },
105
  {
106
  "epoch": 22.857142857142858,
107
+ "grad_norm": 435.5931701660156,
108
  "learning_rate": 9.58e-06,
109
+ "loss": 92.9752,
110
  "step": 480
111
  },
112
  {
113
  "epoch": 24.761904761904763,
114
+ "grad_norm": 232.87234497070312,
115
  "learning_rate": 9.993604266900276e-06,
116
+ "loss": 78.7805,
117
  "step": 520
118
  },
119
  {
120
  "epoch": 26.666666666666668,
121
+ "grad_norm": 188.52627563476562,
122
  "learning_rate": 9.938441702975689e-06,
123
+ "loss": 59.0797,
124
  "step": 560
125
  },
126
  {
127
  "epoch": 28.571428571428573,
128
+ "grad_norm": 177.4958038330078,
129
  "learning_rate": 9.827324621255206e-06,
130
+ "loss": 46.9344,
131
  "step": 600
132
  },
133
  {
134
  "epoch": 28.571428571428573,
135
+ "eval_loss": 129.46548461914062,
136
+ "eval_runtime": 7.2365,
137
+ "eval_samples_per_second": 16.03,
138
+ "eval_steps_per_second": 0.553,
139
  "step": 600
140
  },
141
  {
142
  "epoch": 30.476190476190474,
143
+ "grad_norm": 135.09072875976562,
144
  "learning_rate": 9.661512022674852e-06,
145
+ "loss": 39.1039,
146
  "step": 640
147
  },
148
  {
149
  "epoch": 32.38095238095238,
150
+ "grad_norm": 128.56063842773438,
151
  "learning_rate": 9.4428826302319e-06,
152
+ "loss": 32.5044,
153
  "step": 680
154
  },
155
  {
156
  "epoch": 34.285714285714285,
157
+ "grad_norm": 99.94125366210938,
158
  "learning_rate": 9.173913602302594e-06,
159
+ "loss": 27.9798,
160
  "step": 720
161
  },
162
  {
163
  "epoch": 36.19047619047619,
164
+ "grad_norm": 128.35171508789062,
165
  "learning_rate": 8.857652465447947e-06,
166
+ "loss": 24.9475,
167
  "step": 760
168
  },
169
  {
170
  "epoch": 38.095238095238095,
171
+ "grad_norm": 113.99468231201172,
172
  "learning_rate": 8.497682584720065e-06,
173
+ "loss": 22.38,
174
  "step": 800
175
  },
176
  {
177
  "epoch": 38.095238095238095,
178
+ "eval_loss": 75.64146423339844,
179
+ "eval_runtime": 7.294,
180
+ "eval_samples_per_second": 15.904,
181
+ "eval_steps_per_second": 0.548,
182
  "step": 800
183
  },
184
  {
185
  "epoch": 40.0,
186
+ "grad_norm": 190.8120574951172,
187
  "learning_rate": 8.098082562704253e-06,
188
+ "loss": 19.868,
189
  "step": 840
190
  },
191
  {
192
  "epoch": 41.904761904761905,
193
+ "grad_norm": 58.219482421875,
194
  "learning_rate": 7.663380027322022e-06,
195
+ "loss": 22.0343,
196
  "step": 880
197
  },
198
  {
199
  "epoch": 43.80952380952381,
200
+ "grad_norm": 89.1216049194336,
201
  "learning_rate": 7.1985003319975696e-06,
202
+ "loss": 16.9501,
203
  "step": 920
204
  },
205
  {
206
  "epoch": 45.714285714285715,
207
+ "grad_norm": 147.2601776123047,
208
  "learning_rate": 6.708710749435427e-06,
209
+ "loss": 15.9704,
210
  "step": 960
211
  },
212
  {
213
  "epoch": 47.61904761904762,
214
+ "grad_norm": 103.56001281738281,
215
  "learning_rate": 6.199560791316009e-06,
216
+ "loss": 15.38,
217
  "step": 1000
218
  },
219
  {
220
  "epoch": 47.61904761904762,
221
+ "eval_loss": 79.28771209716797,
222
+ "eval_runtime": 7.3262,
223
+ "eval_samples_per_second": 15.834,
224
+ "eval_steps_per_second": 0.546,
225
  "step": 1000
226
  },
227
  {
228
  "epoch": 49.523809523809526,
229
+ "grad_norm": 49.25249099731445,
230
  "learning_rate": 5.676819330110808e-06,
231
+ "loss": 14.5406,
232
  "step": 1040
233
  },
234
  {
235
  "epoch": 51.42857142857143,
236
+ "grad_norm": 116.98213195800781,
237
  "learning_rate": 5.14640923545217e-06,
238
+ "loss": 14.0173,
239
  "step": 1080
240
  },
241
  {
242
  "epoch": 53.333333333333336,
243
+ "grad_norm": 72.93340301513672,
244
  "learning_rate": 4.614340265653695e-06,
245
+ "loss": 13.7052,
246
  "step": 1120
247
  },
248
  {
249
  "epoch": 55.23809523809524,
250
+ "grad_norm": 105.87384796142578,
251
  "learning_rate": 4.086640974747176e-06,
252
+ "loss": 15.4002,
253
  "step": 1160
254
  },
255
  {
256
  "epoch": 57.142857142857146,
257
+ "grad_norm": 59.23909378051758,
258
  "learning_rate": 3.569290406556519e-06,
259
+ "loss": 14.1171,
260
  "step": 1200
261
  },
262
  {
263
  "epoch": 57.142857142857146,
264
+ "eval_loss": 64.76508331298828,
265
+ "eval_runtime": 7.1521,
266
+ "eval_samples_per_second": 16.219,
267
+ "eval_steps_per_second": 0.559,
268
  "step": 1200
269
  },
270
  {
271
  "epoch": 59.04761904761905,
272
+ "grad_norm": 98.89187622070312,
273
  "learning_rate": 3.068150349742125e-06,
274
+ "loss": 12.3597,
275
  "step": 1240
276
  },
277
  {
278
  "epoch": 60.95238095238095,
279
+ "grad_norm": 179.5811004638672,
280
  "learning_rate": 2.5888989213931047e-06,
281
+ "loss": 12.125,
282
  "step": 1280
283
  },
284
  {
285
  "epoch": 62.857142857142854,
286
+ "grad_norm": 41.74519729614258,
287
  "learning_rate": 2.1369662316918095e-06,
288
+ "loss": 11.7567,
289
  "step": 1320
290
  },
291
  {
292
  "epoch": 64.76190476190476,
293
+ "grad_norm": 60.97040557861328,
294
  "learning_rate": 1.717472858595694e-06,
295
+ "loss": 12.3442,
296
  "step": 1360
297
  },
298
  {
299
  "epoch": 66.66666666666667,
300
+ "grad_norm": 197.75689697265625,
301
  "learning_rate": 1.335171829642984e-06,
302
+ "loss": 11.2475,
303
  "step": 1400
304
  },
305
  {
306
  "epoch": 66.66666666666667,
307
+ "eval_loss": 67.15937805175781,
308
+ "eval_runtime": 7.2137,
309
+ "eval_samples_per_second": 16.08,
310
+ "eval_steps_per_second": 0.554,
311
  "step": 1400
312
  },
313
  {
314
  "epoch": 68.57142857142857,
315
+ "grad_norm": 44.10291290283203,
316
  "learning_rate": 9.943947682514538e-07,
317
+ "loss": 11.0752,
318
  "step": 1440
319
  },
320
  {
321
  "epoch": 70.47619047619048,
322
+ "grad_norm": 54.24775314331055,
323
  "learning_rate": 6.990028146942752e-07,
324
+ "loss": 10.9762,
325
  "step": 1480
326
  },
327
  {
328
  "epoch": 72.38095238095238,
329
+ "grad_norm": 41.610679626464844,
330
  "learning_rate": 4.523428778378852e-07,
331
+ "loss": 10.9561,
332
  "step": 1520
333
  },
334
  {
335
  "epoch": 74.28571428571429,
336
+ "grad_norm": 109.53748321533203,
337
  "learning_rate": 2.5720971332717783e-07,
338
+ "loss": 10.7585,
339
  "step": 1560
340
  },
341
  {
342
  "epoch": 76.19047619047619,
343
+ "grad_norm": 49.795623779296875,
344
  "learning_rate": 1.158142578873156e-07,
345
+ "loss": 11.5847,
346
  "step": 1600
347
  },
348
  {
349
  "epoch": 76.19047619047619,
350
+ "eval_loss": 62.760154724121094,
351
+ "eval_runtime": 7.2145,
352
+ "eval_samples_per_second": 16.079,
353
+ "eval_steps_per_second": 0.554,
354
  "step": 1600
355
  },
356
  {
357
  "epoch": 78.0952380952381,
358
+ "grad_norm": 58.66199493408203,
359
  "learning_rate": 2.9758578527189776e-08,
360
+ "loss": 11.5135,
361
  "step": 1640
362
  },
363
  {
364
  "epoch": 80.0,
365
+ "grad_norm": 53.418094635009766,
366
  "learning_rate": 1.7720480054994606e-11,
367
+ "loss": 10.746,
368
  "step": 1680
369
  }
370
  ],
checkpoint-1680/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0e178d29d8bd1005e0ebacb559c58b57ef56dd83b9d009bde5540a5128a1d4a
3
  size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4185c232729e9f26479512d5ae60057198316f129ed28036d583f86d686e96
3
  size 5777
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fc83e9ffb9854d6f50d18b71d3610cb9b411e48e241f6e96354f2fd615e20c4
3
  size 306699044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f7b879ae25a9c83fa3e849fdb085366c9c812655437eeedbdf446b895a6cc5
3
  size 306699044
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0e178d29d8bd1005e0ebacb559c58b57ef56dd83b9d009bde5540a5128a1d4a
3
  size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4185c232729e9f26479512d5ae60057198316f129ed28036d583f86d686e96
3
  size 5777