Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +445 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201892112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5e26503ee5864af07ce4030836db51a4668d97e466902ce26562a2ae8f4dbd1
|
3 |
size 201892112
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102864868
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7f87dfd3983905fc6f45e08cd5447e3695907754f080349935e996e48fe7470
|
3 |
size 102864868
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba9dafcc54ce9ff084db79366d7b940d91787c2cfb616ba91a4bb22cd048faed
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f5dfab0aba1f4849abd9625392cb4af7139fd3842b09daef021f142b39cff71
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43a4c7a3e3866946bfa6c6f5a1716563f36029ceb622e5804a6f396508c55ea3
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b39933ef00f6fb7daa29790fb90e8ff14fa4ec97393154716671cac132e30db
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0c37e29f749e6da69d82af0a38f01b394a754fce56fccbdf27902a5d585e43f
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33c091df1ce77bff431f3379561fab3489657fa2cde7f17e48d31b6b28bf660c
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83e3de474f59434cf28a8c41406b1e8bd00ec6293d50ff81f9c75db3e70e91e0
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ca6a786fe6d9b25cb7742ed56fbbd3f68052ae10454dcdbf494e787b2682812
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4ac31dcfba988823188c44099086577dd09e162577217892c3f4fd5ad8489c2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4466,6 +4466,447 @@
|
|
4466 |
"learning_rate": 5.445378758417925e-06,
|
4467 |
"loss": 1.063,
|
4468 |
"step": 637
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4469 |
}
|
4470 |
],
|
4471 |
"logging_steps": 1,
|
@@ -4480,12 +4921,12 @@
|
|
4480 |
"should_evaluate": false,
|
4481 |
"should_log": false,
|
4482 |
"should_save": true,
|
4483 |
-
"should_training_stop":
|
4484 |
},
|
4485 |
"attributes": {}
|
4486 |
}
|
4487 |
},
|
4488 |
-
"total_flos": 2.
|
4489 |
"train_batch_size": 4,
|
4490 |
"trial_name": null,
|
4491 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0984699882306788,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4466 |
"learning_rate": 5.445378758417925e-06,
|
4467 |
"loss": 1.063,
|
4468 |
"step": 637
|
4469 |
+
},
|
4470 |
+
{
|
4471 |
+
"epoch": 1.001176932130247,
|
4472 |
+
"grad_norm": 3.3213553428649902,
|
4473 |
+
"learning_rate": 5.275390589031859e-06,
|
4474 |
+
"loss": 1.7375,
|
4475 |
+
"step": 638
|
4476 |
+
},
|
4477 |
+
{
|
4478 |
+
"epoch": 1.0027461749705766,
|
4479 |
+
"grad_norm": 1.4553793668746948,
|
4480 |
+
"learning_rate": 5.108026758167719e-06,
|
4481 |
+
"loss": 0.9496,
|
4482 |
+
"step": 639
|
4483 |
+
},
|
4484 |
+
{
|
4485 |
+
"epoch": 1.0043154178109062,
|
4486 |
+
"grad_norm": 1.3991262912750244,
|
4487 |
+
"learning_rate": 4.943291854189493e-06,
|
4488 |
+
"loss": 0.9158,
|
4489 |
+
"step": 640
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 1.0058846606512357,
|
4493 |
+
"grad_norm": 1.5718517303466797,
|
4494 |
+
"learning_rate": 4.781190393387796e-06,
|
4495 |
+
"loss": 1.032,
|
4496 |
+
"step": 641
|
4497 |
+
},
|
4498 |
+
{
|
4499 |
+
"epoch": 1.0074539034915653,
|
4500 |
+
"grad_norm": 1.546706199645996,
|
4501 |
+
"learning_rate": 4.6217268198560404e-06,
|
4502 |
+
"loss": 0.9635,
|
4503 |
+
"step": 642
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 1.0090231463318948,
|
4507 |
+
"grad_norm": 1.4677547216415405,
|
4508 |
+
"learning_rate": 4.464905505368658e-06,
|
4509 |
+
"loss": 0.9359,
|
4510 |
+
"step": 643
|
4511 |
+
},
|
4512 |
+
{
|
4513 |
+
"epoch": 1.0105923891722244,
|
4514 |
+
"grad_norm": 1.4244987964630127,
|
4515 |
+
"learning_rate": 4.3107307492612086e-06,
|
4516 |
+
"loss": 0.8894,
|
4517 |
+
"step": 644
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 1.012161632012554,
|
4521 |
+
"grad_norm": 1.470013976097107,
|
4522 |
+
"learning_rate": 4.1592067783125015e-06,
|
4523 |
+
"loss": 1.0618,
|
4524 |
+
"step": 645
|
4525 |
+
},
|
4526 |
+
{
|
4527 |
+
"epoch": 1.0137308748528835,
|
4528 |
+
"grad_norm": 1.5071467161178589,
|
4529 |
+
"learning_rate": 4.010337746628751e-06,
|
4530 |
+
"loss": 0.9016,
|
4531 |
+
"step": 646
|
4532 |
+
},
|
4533 |
+
{
|
4534 |
+
"epoch": 1.015300117693213,
|
4535 |
+
"grad_norm": 1.406595230102539,
|
4536 |
+
"learning_rate": 3.864127735529656e-06,
|
4537 |
+
"loss": 0.9207,
|
4538 |
+
"step": 647
|
4539 |
+
},
|
4540 |
+
{
|
4541 |
+
"epoch": 1.0168693605335426,
|
4542 |
+
"grad_norm": 1.355204701423645,
|
4543 |
+
"learning_rate": 3.7205807534365315e-06,
|
4544 |
+
"loss": 0.9483,
|
4545 |
+
"step": 648
|
4546 |
+
},
|
4547 |
+
{
|
4548 |
+
"epoch": 1.0184386033738722,
|
4549 |
+
"grad_norm": 1.5814651250839233,
|
4550 |
+
"learning_rate": 3.5797007357623945e-06,
|
4551 |
+
"loss": 1.0056,
|
4552 |
+
"step": 649
|
4553 |
+
},
|
4554 |
+
{
|
4555 |
+
"epoch": 1.0200078462142017,
|
4556 |
+
"grad_norm": 1.5514806509017944,
|
4557 |
+
"learning_rate": 3.441491544804112e-06,
|
4558 |
+
"loss": 0.9282,
|
4559 |
+
"step": 650
|
4560 |
+
},
|
4561 |
+
{
|
4562 |
+
"epoch": 1.0215770890545313,
|
4563 |
+
"grad_norm": 1.3481324911117554,
|
4564 |
+
"learning_rate": 3.3059569696364502e-06,
|
4565 |
+
"loss": 0.853,
|
4566 |
+
"step": 651
|
4567 |
+
},
|
4568 |
+
{
|
4569 |
+
"epoch": 1.0231463318948608,
|
4570 |
+
"grad_norm": 1.4449180364608765,
|
4571 |
+
"learning_rate": 3.1731007260082616e-06,
|
4572 |
+
"loss": 0.9905,
|
4573 |
+
"step": 652
|
4574 |
+
},
|
4575 |
+
{
|
4576 |
+
"epoch": 1.0247155747351904,
|
4577 |
+
"grad_norm": 1.6003049612045288,
|
4578 |
+
"learning_rate": 3.0429264562405776e-06,
|
4579 |
+
"loss": 0.9784,
|
4580 |
+
"step": 653
|
4581 |
+
},
|
4582 |
+
{
|
4583 |
+
"epoch": 1.0262848175755197,
|
4584 |
+
"grad_norm": 1.501465916633606,
|
4585 |
+
"learning_rate": 2.9154377291267674e-06,
|
4586 |
+
"loss": 1.0185,
|
4587 |
+
"step": 654
|
4588 |
+
},
|
4589 |
+
{
|
4590 |
+
"epoch": 1.0278540604158493,
|
4591 |
+
"grad_norm": 1.373434066772461,
|
4592 |
+
"learning_rate": 2.790638039834668e-06,
|
4593 |
+
"loss": 0.9125,
|
4594 |
+
"step": 655
|
4595 |
+
},
|
4596 |
+
{
|
4597 |
+
"epoch": 1.0294233032561788,
|
4598 |
+
"grad_norm": 1.6120808124542236,
|
4599 |
+
"learning_rate": 2.6685308098108106e-06,
|
4600 |
+
"loss": 1.0961,
|
4601 |
+
"step": 656
|
4602 |
+
},
|
4603 |
+
{
|
4604 |
+
"epoch": 1.0309925460965084,
|
4605 |
+
"grad_norm": 1.455134630203247,
|
4606 |
+
"learning_rate": 2.5491193866866025e-06,
|
4607 |
+
"loss": 0.8963,
|
4608 |
+
"step": 657
|
4609 |
+
},
|
4610 |
+
{
|
4611 |
+
"epoch": 1.032561788936838,
|
4612 |
+
"grad_norm": 1.4574368000030518,
|
4613 |
+
"learning_rate": 2.432407044186509e-06,
|
4614 |
+
"loss": 0.9296,
|
4615 |
+
"step": 658
|
4616 |
+
},
|
4617 |
+
{
|
4618 |
+
"epoch": 1.0341310317771675,
|
4619 |
+
"grad_norm": 1.5060312747955322,
|
4620 |
+
"learning_rate": 2.3183969820383735e-06,
|
4621 |
+
"loss": 0.9674,
|
4622 |
+
"step": 659
|
4623 |
+
},
|
4624 |
+
{
|
4625 |
+
"epoch": 1.035700274617497,
|
4626 |
+
"grad_norm": 1.544988751411438,
|
4627 |
+
"learning_rate": 2.2070923258856255e-06,
|
4628 |
+
"loss": 0.9882,
|
4629 |
+
"step": 660
|
4630 |
+
},
|
4631 |
+
{
|
4632 |
+
"epoch": 1.0372695174578266,
|
4633 |
+
"grad_norm": 1.4960849285125732,
|
4634 |
+
"learning_rate": 2.098496127201648e-06,
|
4635 |
+
"loss": 0.966,
|
4636 |
+
"step": 661
|
4637 |
+
},
|
4638 |
+
{
|
4639 |
+
"epoch": 1.0388387602981561,
|
4640 |
+
"grad_norm": 1.5136550664901733,
|
4641 |
+
"learning_rate": 1.992611363206103e-06,
|
4642 |
+
"loss": 0.9574,
|
4643 |
+
"step": 662
|
4644 |
+
},
|
4645 |
+
{
|
4646 |
+
"epoch": 1.0404080031384857,
|
4647 |
+
"grad_norm": 1.4802120923995972,
|
4648 |
+
"learning_rate": 1.889440936783242e-06,
|
4649 |
+
"loss": 0.8649,
|
4650 |
+
"step": 663
|
4651 |
+
},
|
4652 |
+
{
|
4653 |
+
"epoch": 1.0419772459788152,
|
4654 |
+
"grad_norm": 1.5333938598632812,
|
4655 |
+
"learning_rate": 1.7889876764024505e-06,
|
4656 |
+
"loss": 1.0243,
|
4657 |
+
"step": 664
|
4658 |
+
},
|
4659 |
+
{
|
4660 |
+
"epoch": 1.0435464888191448,
|
4661 |
+
"grad_norm": 1.5968855619430542,
|
4662 |
+
"learning_rate": 1.691254336040595e-06,
|
4663 |
+
"loss": 0.9513,
|
4664 |
+
"step": 665
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 1.0451157316594744,
|
4668 |
+
"grad_norm": 1.512230396270752,
|
4669 |
+
"learning_rate": 1.59624359510657e-06,
|
4670 |
+
"loss": 1.037,
|
4671 |
+
"step": 666
|
4672 |
+
},
|
4673 |
+
{
|
4674 |
+
"epoch": 1.046684974499804,
|
4675 |
+
"grad_norm": 1.5507651567459106,
|
4676 |
+
"learning_rate": 1.5039580583678393e-06,
|
4677 |
+
"loss": 0.9005,
|
4678 |
+
"step": 667
|
4679 |
+
},
|
4680 |
+
{
|
4681 |
+
"epoch": 1.0482542173401335,
|
4682 |
+
"grad_norm": 1.6249401569366455,
|
4683 |
+
"learning_rate": 1.414400255879008e-06,
|
4684 |
+
"loss": 0.9521,
|
4685 |
+
"step": 668
|
4686 |
+
},
|
4687 |
+
{
|
4688 |
+
"epoch": 1.049823460180463,
|
4689 |
+
"grad_norm": 1.5170681476593018,
|
4690 |
+
"learning_rate": 1.327572642912468e-06,
|
4691 |
+
"loss": 1.0033,
|
4692 |
+
"step": 669
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 1.0513927030207926,
|
4696 |
+
"grad_norm": 1.505729079246521,
|
4697 |
+
"learning_rate": 1.2434775998910964e-06,
|
4698 |
+
"loss": 0.9384,
|
4699 |
+
"step": 670
|
4700 |
+
},
|
4701 |
+
{
|
4702 |
+
"epoch": 1.052961945861122,
|
4703 |
+
"grad_norm": 1.5404661893844604,
|
4704 |
+
"learning_rate": 1.1621174323229612e-06,
|
4705 |
+
"loss": 1.0742,
|
4706 |
+
"step": 671
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 1.0545311887014515,
|
4710 |
+
"grad_norm": 1.6279956102371216,
|
4711 |
+
"learning_rate": 1.0834943707381784e-06,
|
4712 |
+
"loss": 0.8588,
|
4713 |
+
"step": 672
|
4714 |
+
},
|
4715 |
+
{
|
4716 |
+
"epoch": 1.056100431541781,
|
4717 |
+
"grad_norm": 1.5603866577148438,
|
4718 |
+
"learning_rate": 1.0076105706276888e-06,
|
4719 |
+
"loss": 1.0602,
|
4720 |
+
"step": 673
|
4721 |
+
},
|
4722 |
+
{
|
4723 |
+
"epoch": 1.0576696743821106,
|
4724 |
+
"grad_norm": 1.5433942079544067,
|
4725 |
+
"learning_rate": 9.344681123841967e-07,
|
4726 |
+
"loss": 0.9865,
|
4727 |
+
"step": 674
|
4728 |
+
},
|
4729 |
+
{
|
4730 |
+
"epoch": 1.0592389172224401,
|
4731 |
+
"grad_norm": 1.5365676879882812,
|
4732 |
+
"learning_rate": 8.640690012451515e-07,
|
4733 |
+
"loss": 0.967,
|
4734 |
+
"step": 675
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 1.0608081600627697,
|
4738 |
+
"grad_norm": 1.5364326238632202,
|
4739 |
+
"learning_rate": 7.964151672377458e-07,
|
4740 |
+
"loss": 0.9036,
|
4741 |
+
"step": 676
|
4742 |
+
},
|
4743 |
+
{
|
4744 |
+
"epoch": 1.0623774029030992,
|
4745 |
+
"grad_norm": 1.4506745338439941,
|
4746 |
+
"learning_rate": 7.315084651260009e-07,
|
4747 |
+
"loss": 0.8917,
|
4748 |
+
"step": 677
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 1.0639466457434288,
|
4752 |
+
"grad_norm": 1.42936372756958,
|
4753 |
+
"learning_rate": 6.69350674359959e-07,
|
4754 |
+
"loss": 0.8587,
|
4755 |
+
"step": 678
|
4756 |
+
},
|
4757 |
+
{
|
4758 |
+
"epoch": 1.0655158885837583,
|
4759 |
+
"grad_norm": 1.4997657537460327,
|
4760 |
+
"learning_rate": 6.099434990268609e-07,
|
4761 |
+
"loss": 1.0052,
|
4762 |
+
"step": 679
|
4763 |
+
},
|
4764 |
+
{
|
4765 |
+
"epoch": 1.067085131424088,
|
4766 |
+
"grad_norm": 1.4839539527893066,
|
4767 |
+
"learning_rate": 5.532885678043977e-07,
|
4768 |
+
"loss": 0.8442,
|
4769 |
+
"step": 680
|
4770 |
+
},
|
4771 |
+
{
|
4772 |
+
"epoch": 1.0686543742644175,
|
4773 |
+
"grad_norm": 1.5665620565414429,
|
4774 |
+
"learning_rate": 4.9938743391615e-07,
|
4775 |
+
"loss": 0.9402,
|
4776 |
+
"step": 681
|
4777 |
+
},
|
4778 |
+
{
|
4779 |
+
"epoch": 1.070223617104747,
|
4780 |
+
"grad_norm": 1.7468892335891724,
|
4781 |
+
"learning_rate": 4.482415750889204e-07,
|
4782 |
+
"loss": 0.9801,
|
4783 |
+
"step": 682
|
4784 |
+
},
|
4785 |
+
{
|
4786 |
+
"epoch": 1.0717928599450766,
|
4787 |
+
"grad_norm": 1.5470365285873413,
|
4788 |
+
"learning_rate": 3.998523935122772e-07,
|
4789 |
+
"loss": 0.9294,
|
4790 |
+
"step": 683
|
4791 |
+
},
|
4792 |
+
{
|
4793 |
+
"epoch": 1.0733621027854061,
|
4794 |
+
"grad_norm": 1.544100046157837,
|
4795 |
+
"learning_rate": 3.5422121580005864e-07,
|
4796 |
+
"loss": 0.9683,
|
4797 |
+
"step": 684
|
4798 |
+
},
|
4799 |
+
{
|
4800 |
+
"epoch": 1.0749313456257357,
|
4801 |
+
"grad_norm": 1.5746307373046875,
|
4802 |
+
"learning_rate": 3.1134929295407564e-07,
|
4803 |
+
"loss": 0.9611,
|
4804 |
+
"step": 685
|
4805 |
+
},
|
4806 |
+
{
|
4807 |
+
"epoch": 1.0765005884660652,
|
4808 |
+
"grad_norm": 1.612066388130188,
|
4809 |
+
"learning_rate": 2.7123780032973235e-07,
|
4810 |
+
"loss": 0.9273,
|
4811 |
+
"step": 686
|
4812 |
+
},
|
4813 |
+
{
|
4814 |
+
"epoch": 1.0780698313063946,
|
4815 |
+
"grad_norm": 1.5221633911132812,
|
4816 |
+
"learning_rate": 2.3388783760386601e-07,
|
4817 |
+
"loss": 0.9797,
|
4818 |
+
"step": 687
|
4819 |
+
},
|
4820 |
+
{
|
4821 |
+
"epoch": 1.079639074146724,
|
4822 |
+
"grad_norm": 1.4008647203445435,
|
4823 |
+
"learning_rate": 1.9930042874457254e-07,
|
4824 |
+
"loss": 0.9276,
|
4825 |
+
"step": 688
|
4826 |
+
},
|
4827 |
+
{
|
4828 |
+
"epoch": 1.0812083169870537,
|
4829 |
+
"grad_norm": 1.4528234004974365,
|
4830 |
+
"learning_rate": 1.6747652198313957e-07,
|
4831 |
+
"loss": 0.9828,
|
4832 |
+
"step": 689
|
4833 |
+
},
|
4834 |
+
{
|
4835 |
+
"epoch": 1.0827775598273832,
|
4836 |
+
"grad_norm": 1.5776127576828003,
|
4837 |
+
"learning_rate": 1.3841698978804285e-07,
|
4838 |
+
"loss": 0.9532,
|
4839 |
+
"step": 690
|
4840 |
+
},
|
4841 |
+
{
|
4842 |
+
"epoch": 1.0843468026677128,
|
4843 |
+
"grad_norm": 1.5421435832977295,
|
4844 |
+
"learning_rate": 1.1212262884103974e-07,
|
4845 |
+
"loss": 0.9135,
|
4846 |
+
"step": 691
|
4847 |
+
},
|
4848 |
+
{
|
4849 |
+
"epoch": 1.0859160455080423,
|
4850 |
+
"grad_norm": 1.5392076969146729,
|
4851 |
+
"learning_rate": 8.85941600153033e-08,
|
4852 |
+
"loss": 0.9884,
|
4853 |
+
"step": 692
|
4854 |
+
},
|
4855 |
+
{
|
4856 |
+
"epoch": 1.0874852883483719,
|
4857 |
+
"grad_norm": 1.4609785079956055,
|
4858 |
+
"learning_rate": 6.783222835572055e-08,
|
4859 |
+
"loss": 0.933,
|
4860 |
+
"step": 693
|
4861 |
+
},
|
4862 |
+
{
|
4863 |
+
"epoch": 1.0890545311887014,
|
4864 |
+
"grad_norm": 1.4286479949951172,
|
4865 |
+
"learning_rate": 4.98374030611084e-08,
|
4866 |
+
"loss": 0.8715,
|
4867 |
+
"step": 694
|
4868 |
+
},
|
4869 |
+
{
|
4870 |
+
"epoch": 1.090623774029031,
|
4871 |
+
"grad_norm": 1.6319226026535034,
|
4872 |
+
"learning_rate": 3.461017746871675e-08,
|
4873 |
+
"loss": 1.0219,
|
4874 |
+
"step": 695
|
4875 |
+
},
|
4876 |
+
{
|
4877 |
+
"epoch": 1.0921930168693605,
|
4878 |
+
"grad_norm": 1.5931543111801147,
|
4879 |
+
"learning_rate": 2.215096904060454e-08,
|
4880 |
+
"loss": 0.9608,
|
4881 |
+
"step": 696
|
4882 |
+
},
|
4883 |
+
{
|
4884 |
+
"epoch": 1.09376225970969,
|
4885 |
+
"grad_norm": 1.4178990125656128,
|
4886 |
+
"learning_rate": 1.246011935228064e-08,
|
4887 |
+
"loss": 0.8892,
|
4888 |
+
"step": 697
|
4889 |
+
},
|
4890 |
+
{
|
4891 |
+
"epoch": 1.0953315025500197,
|
4892 |
+
"grad_norm": 1.5912450551986694,
|
4893 |
+
"learning_rate": 5.537894083273543e-09,
|
4894 |
+
"loss": 0.9645,
|
4895 |
+
"step": 698
|
4896 |
+
},
|
4897 |
+
{
|
4898 |
+
"epoch": 1.0969007453903492,
|
4899 |
+
"grad_norm": 1.4889562129974365,
|
4900 |
+
"learning_rate": 1.384483009898796e-09,
|
4901 |
+
"loss": 0.87,
|
4902 |
+
"step": 699
|
4903 |
+
},
|
4904 |
+
{
|
4905 |
+
"epoch": 1.0984699882306788,
|
4906 |
+
"grad_norm": 1.4964066743850708,
|
4907 |
+
"learning_rate": 0.0,
|
4908 |
+
"loss": 0.9337,
|
4909 |
+
"step": 700
|
4910 |
}
|
4911 |
],
|
4912 |
"logging_steps": 1,
|
|
|
4921 |
"should_evaluate": false,
|
4922 |
"should_log": false,
|
4923 |
"should_save": true,
|
4924 |
+
"should_training_stop": true
|
4925 |
},
|
4926 |
"attributes": {}
|
4927 |
}
|
4928 |
},
|
4929 |
+
"total_flos": 2.524580473166889e+17,
|
4930 |
"train_batch_size": 4,
|
4931 |
"trial_name": null,
|
4932 |
"trial_params": null
|