Training in progress, step 325800
Browse files- adapter_model.safetensors +1 -1
- last-checkpoint/adapter_config.json +4 -4
- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3 -346
- last-checkpoint/training_args.bin +1 -1
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 778096664
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5a877bc1ced1c90857fb89b0d39b0f5b375a46377a51f59e1855144d87589c3
|
3 |
size 778096664
|
last-checkpoint/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
-
"up_proj",
|
27 |
-
"q_proj",
|
28 |
-
"gate_proj",
|
29 |
"k_proj",
|
|
|
30 |
"o_proj",
|
|
|
31 |
"v_proj",
|
32 |
-
"
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
|
|
|
|
|
|
26 |
"k_proj",
|
27 |
+
"q_proj",
|
28 |
"o_proj",
|
29 |
+
"down_proj",
|
30 |
"v_proj",
|
31 |
+
"gate_proj",
|
32 |
+
"up_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 778096664
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b2960647651f74a021854aa83d2f1e44e7edf833c93d30f3f0898a3b62998f8
|
3 |
size 778096664
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 396582274
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1edc103979e14bdcf4aecb105646466ee2930c03c3298174482e60391794d7b7
|
3 |
size 396582274
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f617a484c0b6df4d5c6eea4305a692f5553b5d1313d4e6a74ecee2702d9ed2da
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d177f6580f087efd416cbcb9dd0a3c522d49c6ad1fc91a3bc8e0586cfb577f5
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11060,349 +11060,6 @@
|
|
11060 |
"learning_rate": 1.432751373441014e-05,
|
11061 |
"loss": 1.715,
|
11062 |
"step": 315800
|
11063 |
-
},
|
11064 |
-
{
|
11065 |
-
"epoch": 0.8790890856638931,
|
11066 |
-
"grad_norm": 2.656094551086426,
|
11067 |
-
"learning_rate": 1.4321506470902243e-05,
|
11068 |
-
"loss": 1.6757,
|
11069 |
-
"step": 316000
|
11070 |
-
},
|
11071 |
-
{
|
11072 |
-
"epoch": 0.8796454711611487,
|
11073 |
-
"grad_norm": 2.0576250553131104,
|
11074 |
-
"learning_rate": 1.4315498198000568e-05,
|
11075 |
-
"loss": 1.695,
|
11076 |
-
"step": 316200
|
11077 |
-
},
|
11078 |
-
{
|
11079 |
-
"epoch": 0.8802018566584043,
|
11080 |
-
"grad_norm": 1.9753005504608154,
|
11081 |
-
"learning_rate": 1.4309488920294356e-05,
|
11082 |
-
"loss": 1.7,
|
11083 |
-
"step": 316400
|
11084 |
-
},
|
11085 |
-
{
|
11086 |
-
"epoch": 0.88075824215566,
|
11087 |
-
"grad_norm": 1.6092278957366943,
|
11088 |
-
"learning_rate": 1.430347864237362e-05,
|
11089 |
-
"loss": 1.6588,
|
11090 |
-
"step": 316600
|
11091 |
-
},
|
11092 |
-
{
|
11093 |
-
"epoch": 0.8813146276529156,
|
11094 |
-
"grad_norm": 1.910270094871521,
|
11095 |
-
"learning_rate": 1.4297467368829136e-05,
|
11096 |
-
"loss": 1.6998,
|
11097 |
-
"step": 316800
|
11098 |
-
},
|
11099 |
-
{
|
11100 |
-
"epoch": 0.8818710131501712,
|
11101 |
-
"grad_norm": 3.1925556659698486,
|
11102 |
-
"learning_rate": 1.4291455104252431e-05,
|
11103 |
-
"loss": 1.6877,
|
11104 |
-
"step": 317000
|
11105 |
-
},
|
11106 |
-
{
|
11107 |
-
"epoch": 0.8824273986474268,
|
11108 |
-
"grad_norm": 2.2833807468414307,
|
11109 |
-
"learning_rate": 1.4285441853235804e-05,
|
11110 |
-
"loss": 1.6788,
|
11111 |
-
"step": 317200
|
11112 |
-
},
|
11113 |
-
{
|
11114 |
-
"epoch": 0.8829837841446825,
|
11115 |
-
"grad_norm": 2.9970617294311523,
|
11116 |
-
"learning_rate": 1.4279427620372295e-05,
|
11117 |
-
"loss": 1.6621,
|
11118 |
-
"step": 317400
|
11119 |
-
},
|
11120 |
-
{
|
11121 |
-
"epoch": 0.8835401696419382,
|
11122 |
-
"grad_norm": 3.0393621921539307,
|
11123 |
-
"learning_rate": 1.4273412410255701e-05,
|
11124 |
-
"loss": 1.69,
|
11125 |
-
"step": 317600
|
11126 |
-
},
|
11127 |
-
{
|
11128 |
-
"epoch": 0.8840965551391937,
|
11129 |
-
"grad_norm": 1.8881953954696655,
|
11130 |
-
"learning_rate": 1.4267396227480556e-05,
|
11131 |
-
"loss": 1.6958,
|
11132 |
-
"step": 317800
|
11133 |
-
},
|
11134 |
-
{
|
11135 |
-
"epoch": 0.8846529406364494,
|
11136 |
-
"grad_norm": 2.3096697330474854,
|
11137 |
-
"learning_rate": 1.4261379076642155e-05,
|
11138 |
-
"loss": 1.6904,
|
11139 |
-
"step": 318000
|
11140 |
-
},
|
11141 |
-
{
|
11142 |
-
"epoch": 0.885209326133705,
|
11143 |
-
"grad_norm": 2.055274486541748,
|
11144 |
-
"learning_rate": 1.4255360962336515e-05,
|
11145 |
-
"loss": 1.6806,
|
11146 |
-
"step": 318200
|
11147 |
-
},
|
11148 |
-
{
|
11149 |
-
"epoch": 0.8857657116309606,
|
11150 |
-
"grad_norm": 2.894618511199951,
|
11151 |
-
"learning_rate": 1.4249341889160393e-05,
|
11152 |
-
"loss": 1.691,
|
11153 |
-
"step": 318400
|
11154 |
-
},
|
11155 |
-
{
|
11156 |
-
"epoch": 0.8863220971282163,
|
11157 |
-
"grad_norm": 2.6434152126312256,
|
11158 |
-
"learning_rate": 1.4243321861711285e-05,
|
11159 |
-
"loss": 1.707,
|
11160 |
-
"step": 318600
|
11161 |
-
},
|
11162 |
-
{
|
11163 |
-
"epoch": 0.8868784826254719,
|
11164 |
-
"grad_norm": 1.936525821685791,
|
11165 |
-
"learning_rate": 1.423730088458741e-05,
|
11166 |
-
"loss": 1.7093,
|
11167 |
-
"step": 318800
|
11168 |
-
},
|
11169 |
-
{
|
11170 |
-
"epoch": 0.8874348681227275,
|
11171 |
-
"grad_norm": 1.9834107160568237,
|
11172 |
-
"learning_rate": 1.4231278962387715e-05,
|
11173 |
-
"loss": 1.713,
|
11174 |
-
"step": 319000
|
11175 |
-
},
|
11176 |
-
{
|
11177 |
-
"epoch": 0.8879912536199831,
|
11178 |
-
"grad_norm": 2.6799023151397705,
|
11179 |
-
"learning_rate": 1.4225256099711869e-05,
|
11180 |
-
"loss": 1.6735,
|
11181 |
-
"step": 319200
|
11182 |
-
},
|
11183 |
-
{
|
11184 |
-
"epoch": 0.8885476391172388,
|
11185 |
-
"grad_norm": 2.4906699657440186,
|
11186 |
-
"learning_rate": 1.4219232301160252e-05,
|
11187 |
-
"loss": 1.6612,
|
11188 |
-
"step": 319400
|
11189 |
-
},
|
11190 |
-
{
|
11191 |
-
"epoch": 0.8891040246144944,
|
11192 |
-
"grad_norm": 3.0069377422332764,
|
11193 |
-
"learning_rate": 1.4213207571333971e-05,
|
11194 |
-
"loss": 1.6597,
|
11195 |
-
"step": 319600
|
11196 |
-
},
|
11197 |
-
{
|
11198 |
-
"epoch": 0.88966041011175,
|
11199 |
-
"grad_norm": 2.4262261390686035,
|
11200 |
-
"learning_rate": 1.4207181914834838e-05,
|
11201 |
-
"loss": 1.6934,
|
11202 |
-
"step": 319800
|
11203 |
-
},
|
11204 |
-
{
|
11205 |
-
"epoch": 0.8902167956090057,
|
11206 |
-
"grad_norm": 1.9308116436004639,
|
11207 |
-
"learning_rate": 1.420115533626537e-05,
|
11208 |
-
"loss": 1.7004,
|
11209 |
-
"step": 320000
|
11210 |
-
},
|
11211 |
-
{
|
11212 |
-
"epoch": 0.8907731811062612,
|
11213 |
-
"grad_norm": 2.419318199157715,
|
11214 |
-
"learning_rate": 1.4195127840228795e-05,
|
11215 |
-
"loss": 1.6635,
|
11216 |
-
"step": 320200
|
11217 |
-
},
|
11218 |
-
{
|
11219 |
-
"epoch": 0.8913295666035169,
|
11220 |
-
"grad_norm": 2.2832937240600586,
|
11221 |
-
"learning_rate": 1.4189099431329036e-05,
|
11222 |
-
"loss": 1.6958,
|
11223 |
-
"step": 320400
|
11224 |
-
},
|
11225 |
-
{
|
11226 |
-
"epoch": 0.8918859521007726,
|
11227 |
-
"grad_norm": 2.273939609527588,
|
11228 |
-
"learning_rate": 1.4183070114170717e-05,
|
11229 |
-
"loss": 1.6445,
|
11230 |
-
"step": 320600
|
11231 |
-
},
|
11232 |
-
{
|
11233 |
-
"epoch": 0.8924423375980282,
|
11234 |
-
"grad_norm": 2.2276997566223145,
|
11235 |
-
"learning_rate": 1.4177039893359151e-05,
|
11236 |
-
"loss": 1.6742,
|
11237 |
-
"step": 320800
|
11238 |
-
},
|
11239 |
-
{
|
11240 |
-
"epoch": 0.8929987230952838,
|
11241 |
-
"grad_norm": 2.669675350189209,
|
11242 |
-
"learning_rate": 1.4171008773500346e-05,
|
11243 |
-
"loss": 1.7064,
|
11244 |
-
"step": 321000
|
11245 |
-
},
|
11246 |
-
{
|
11247 |
-
"epoch": 0.8935551085925394,
|
11248 |
-
"grad_norm": 1.8450345993041992,
|
11249 |
-
"learning_rate": 1.4164976759200998e-05,
|
11250 |
-
"loss": 1.6997,
|
11251 |
-
"step": 321200
|
11252 |
-
},
|
11253 |
-
{
|
11254 |
-
"epoch": 0.8941114940897951,
|
11255 |
-
"grad_norm": 2.5424864292144775,
|
11256 |
-
"learning_rate": 1.4158943855068478e-05,
|
11257 |
-
"loss": 1.6985,
|
11258 |
-
"step": 321400
|
11259 |
-
},
|
11260 |
-
{
|
11261 |
-
"epoch": 0.8946678795870506,
|
11262 |
-
"grad_norm": 2.899928569793701,
|
11263 |
-
"learning_rate": 1.415291006571085e-05,
|
11264 |
-
"loss": 1.688,
|
11265 |
-
"step": 321600
|
11266 |
-
},
|
11267 |
-
{
|
11268 |
-
"epoch": 0.8952242650843063,
|
11269 |
-
"grad_norm": 2.4306631088256836,
|
11270 |
-
"learning_rate": 1.4146875395736839e-05,
|
11271 |
-
"loss": 1.7061,
|
11272 |
-
"step": 321800
|
11273 |
-
},
|
11274 |
-
{
|
11275 |
-
"epoch": 0.895780650581562,
|
11276 |
-
"grad_norm": 3.6513545513153076,
|
11277 |
-
"learning_rate": 1.4140839849755852e-05,
|
11278 |
-
"loss": 1.7021,
|
11279 |
-
"step": 322000
|
11280 |
-
},
|
11281 |
-
{
|
11282 |
-
"epoch": 0.8963370360788175,
|
11283 |
-
"grad_norm": 2.4351377487182617,
|
11284 |
-
"learning_rate": 1.4134803432377963e-05,
|
11285 |
-
"loss": 1.6831,
|
11286 |
-
"step": 322200
|
11287 |
-
},
|
11288 |
-
{
|
11289 |
-
"epoch": 0.8968934215760732,
|
11290 |
-
"grad_norm": 2.7318243980407715,
|
11291 |
-
"learning_rate": 1.4128766148213911e-05,
|
11292 |
-
"loss": 1.7039,
|
11293 |
-
"step": 322400
|
11294 |
-
},
|
11295 |
-
{
|
11296 |
-
"epoch": 0.8974498070733288,
|
11297 |
-
"grad_norm": 2.675609827041626,
|
11298 |
-
"learning_rate": 1.4122728001875102e-05,
|
11299 |
-
"loss": 1.6713,
|
11300 |
-
"step": 322600
|
11301 |
-
},
|
11302 |
-
{
|
11303 |
-
"epoch": 0.8980061925705844,
|
11304 |
-
"grad_norm": 2.1647613048553467,
|
11305 |
-
"learning_rate": 1.4116688997973589e-05,
|
11306 |
-
"loss": 1.678,
|
11307 |
-
"step": 322800
|
11308 |
-
},
|
11309 |
-
{
|
11310 |
-
"epoch": 0.8985625780678401,
|
11311 |
-
"grad_norm": 2.230194330215454,
|
11312 |
-
"learning_rate": 1.411064914112209e-05,
|
11313 |
-
"loss": 1.6854,
|
11314 |
-
"step": 323000
|
11315 |
-
},
|
11316 |
-
{
|
11317 |
-
"epoch": 0.8991189635650957,
|
11318 |
-
"grad_norm": 2.459803819656372,
|
11319 |
-
"learning_rate": 1.4104608435933977e-05,
|
11320 |
-
"loss": 1.6688,
|
11321 |
-
"step": 323200
|
11322 |
-
},
|
11323 |
-
{
|
11324 |
-
"epoch": 0.8996753490623514,
|
11325 |
-
"grad_norm": 2.098407030105591,
|
11326 |
-
"learning_rate": 1.4098566887023254e-05,
|
11327 |
-
"loss": 1.6779,
|
11328 |
-
"step": 323400
|
11329 |
-
},
|
11330 |
-
{
|
11331 |
-
"epoch": 0.9002317345596069,
|
11332 |
-
"grad_norm": 2.0758612155914307,
|
11333 |
-
"learning_rate": 1.4092524499004592e-05,
|
11334 |
-
"loss": 1.6629,
|
11335 |
-
"step": 323600
|
11336 |
-
},
|
11337 |
-
{
|
11338 |
-
"epoch": 0.9007881200568626,
|
11339 |
-
"grad_norm": 4.269808769226074,
|
11340 |
-
"learning_rate": 1.4086481276493285e-05,
|
11341 |
-
"loss": 1.6618,
|
11342 |
-
"step": 323800
|
11343 |
-
},
|
11344 |
-
{
|
11345 |
-
"epoch": 0.9013445055541183,
|
11346 |
-
"grad_norm": 2.4955685138702393,
|
11347 |
-
"learning_rate": 1.4080437224105268e-05,
|
11348 |
-
"loss": 1.6878,
|
11349 |
-
"step": 324000
|
11350 |
-
},
|
11351 |
-
{
|
11352 |
-
"epoch": 0.9019008910513738,
|
11353 |
-
"grad_norm": 1.8088123798370361,
|
11354 |
-
"learning_rate": 1.4074392346457116e-05,
|
11355 |
-
"loss": 1.6641,
|
11356 |
-
"step": 324200
|
11357 |
-
},
|
11358 |
-
{
|
11359 |
-
"epoch": 0.9024572765486295,
|
11360 |
-
"grad_norm": 4.655933380126953,
|
11361 |
-
"learning_rate": 1.4068346648166026e-05,
|
11362 |
-
"loss": 1.6958,
|
11363 |
-
"step": 324400
|
11364 |
-
},
|
11365 |
-
{
|
11366 |
-
"epoch": 0.9030136620458851,
|
11367 |
-
"grad_norm": 2.517047643661499,
|
11368 |
-
"learning_rate": 1.4062300133849833e-05,
|
11369 |
-
"loss": 1.6641,
|
11370 |
-
"step": 324600
|
11371 |
-
},
|
11372 |
-
{
|
11373 |
-
"epoch": 0.9035700475431407,
|
11374 |
-
"grad_norm": 2.3092081546783447,
|
11375 |
-
"learning_rate": 1.4056252808126984e-05,
|
11376 |
-
"loss": 1.7042,
|
11377 |
-
"step": 324800
|
11378 |
-
},
|
11379 |
-
{
|
11380 |
-
"epoch": 0.9041264330403964,
|
11381 |
-
"grad_norm": 1.8197965621948242,
|
11382 |
-
"learning_rate": 1.4050204675616556e-05,
|
11383 |
-
"loss": 1.6906,
|
11384 |
-
"step": 325000
|
11385 |
-
},
|
11386 |
-
{
|
11387 |
-
"epoch": 0.904682818537652,
|
11388 |
-
"grad_norm": 2.1050660610198975,
|
11389 |
-
"learning_rate": 1.4044155740938232e-05,
|
11390 |
-
"loss": 1.667,
|
11391 |
-
"step": 325200
|
11392 |
-
},
|
11393 |
-
{
|
11394 |
-
"epoch": 0.9052392040349077,
|
11395 |
-
"grad_norm": 2.001034736633301,
|
11396 |
-
"learning_rate": 1.4038106008712313e-05,
|
11397 |
-
"loss": 1.6856,
|
11398 |
-
"step": 325400
|
11399 |
-
},
|
11400 |
-
{
|
11401 |
-
"epoch": 0.9057955895321632,
|
11402 |
-
"grad_norm": 3.069322109222412,
|
11403 |
-
"learning_rate": 1.403205548355971e-05,
|
11404 |
-
"loss": 1.6637,
|
11405 |
-
"step": 325600
|
11406 |
}
|
11407 |
],
|
11408 |
"logging_steps": 200,
|
@@ -11422,7 +11079,7 @@
|
|
11422 |
"attributes": {}
|
11423 |
}
|
11424 |
},
|
11425 |
-
"total_flos": 6.
|
11426 |
"train_batch_size": 4,
|
11427 |
"trial_name": null,
|
11428 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8785327001666374,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 315800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11060 |
"learning_rate": 1.432751373441014e-05,
|
11061 |
"loss": 1.715,
|
11062 |
"step": 315800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11063 |
}
|
11064 |
],
|
11065 |
"logging_steps": 200,
|
|
|
11079 |
"attributes": {}
|
11080 |
}
|
11081 |
},
|
11082 |
+
"total_flos": 6.739425054956544e+18,
|
11083 |
"train_batch_size": 4,
|
11084 |
"trial_name": null,
|
11085 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73f1bca4a88b95f7d26d58c9cb2c2e4f7a79a6873effa8be0378f7f0ebfb8ceb
|
3 |
size 6840
|