nttx commited on
Commit
6c85ecd
·
verified ·
1 Parent(s): 7f2eef8

Training in progress, step 1350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1adeacce160eee30a7b7f112deb15fd83a02d2b1897477759c0f1c02bf36d03d
3
  size 116744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cdbb1a57278412e92152a4949d49ba4732c41ab6121dfb06ee65f0af9eda337
3
  size 116744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a006271130b19fe40e03a32f3c5eca6b0d66f4b61ad83cb905fa03ce3b082ec
3
  size 194232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e1716ae3464eaef731275b1c94aab904a0491f865308d58ace787a8fbb5090a
3
  size 194232
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adf12ef8f6463911ee8aa1af296eae36d8607ed713341585f5542277591b95f4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d081faefb9e0a6fe5c1330abed4f18df22ca944dfb82a1f3198a99904e57c013
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:691717e600aaf0217103d9f677d016978cf61d1451c79796609a9daf6ab0e119
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6444f03632ac809ddd302ec167645d7a82c68acafc185363d92b0bcd166284dc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 9.77461051940918,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1200",
4
- "epoch": 2.0833333333333335,
5
  "eval_steps": 150,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -919,6 +919,119 @@
919
  "eval_samples_per_second": 245.711,
920
  "eval_steps_per_second": 61.808,
921
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
922
  }
923
  ],
924
  "logging_steps": 10,
@@ -947,7 +1060,7 @@
947
  "attributes": {}
948
  }
949
  },
950
- "total_flos": 168069167185920.0,
951
  "train_batch_size": 4,
952
  "trial_name": null,
953
  "trial_params": null
 
1
  {
2
+ "best_metric": 9.774312973022461,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1350",
4
+ "epoch": 2.34375,
5
  "eval_steps": 150,
6
+ "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
919
  "eval_samples_per_second": 245.711,
920
  "eval_steps_per_second": 61.808,
921
  "step": 1200
922
+ },
923
+ {
924
+ "epoch": 2.1006944444444446,
925
+ "grad_norm": 0.1574486941099167,
926
+ "learning_rate": 9.549150281252633e-06,
927
+ "loss": 9.7931,
928
+ "step": 1210
929
+ },
930
+ {
931
+ "epoch": 2.1180555555555554,
932
+ "grad_norm": 0.1029452309012413,
933
+ "learning_rate": 8.921941636953435e-06,
934
+ "loss": 9.7825,
935
+ "step": 1220
936
+ },
937
+ {
938
+ "epoch": 2.1354166666666665,
939
+ "grad_norm": 0.1019529327750206,
940
+ "learning_rate": 8.3140151960435e-06,
941
+ "loss": 9.7713,
942
+ "step": 1230
943
+ },
944
+ {
945
+ "epoch": 2.1527777777777777,
946
+ "grad_norm": 0.08560575544834137,
947
+ "learning_rate": 7.725656321603413e-06,
948
+ "loss": 9.7779,
949
+ "step": 1240
950
+ },
951
+ {
952
+ "epoch": 2.170138888888889,
953
+ "grad_norm": 0.1940574049949646,
954
+ "learning_rate": 7.157141191620548e-06,
955
+ "loss": 9.7698,
956
+ "step": 1250
957
+ },
958
+ {
959
+ "epoch": 2.1875,
960
+ "grad_norm": 0.1854049563407898,
961
+ "learning_rate": 6.6087366693499295e-06,
962
+ "loss": 9.7892,
963
+ "step": 1260
964
+ },
965
+ {
966
+ "epoch": 2.204861111111111,
967
+ "grad_norm": 0.08304405212402344,
968
+ "learning_rate": 6.080700178047688e-06,
969
+ "loss": 9.779,
970
+ "step": 1270
971
+ },
972
+ {
973
+ "epoch": 2.2222222222222223,
974
+ "grad_norm": 0.1659172624349594,
975
+ "learning_rate": 5.573279580135438e-06,
976
+ "loss": 9.7726,
977
+ "step": 1280
978
+ },
979
+ {
980
+ "epoch": 2.2395833333333335,
981
+ "grad_norm": 0.09768513590097427,
982
+ "learning_rate": 5.086713060852788e-06,
983
+ "loss": 9.7745,
984
+ "step": 1290
985
+ },
986
+ {
987
+ "epoch": 2.2569444444444446,
988
+ "grad_norm": 0.25313788652420044,
989
+ "learning_rate": 4.621229016452156e-06,
990
+ "loss": 9.7725,
991
+ "step": 1300
992
+ },
993
+ {
994
+ "epoch": 2.2743055555555554,
995
+ "grad_norm": 0.14335955679416656,
996
+ "learning_rate": 4.1770459469887005e-06,
997
+ "loss": 9.7953,
998
+ "step": 1310
999
+ },
1000
+ {
1001
+ "epoch": 2.2916666666666665,
1002
+ "grad_norm": 0.09536216408014297,
1003
+ "learning_rate": 3.7543723537555585e-06,
1004
+ "loss": 9.7852,
1005
+ "step": 1320
1006
+ },
1007
+ {
1008
+ "epoch": 2.3090277777777777,
1009
+ "grad_norm": 0.0851445123553276,
1010
+ "learning_rate": 3.35340664141246e-06,
1011
+ "loss": 9.7754,
1012
+ "step": 1330
1013
+ },
1014
+ {
1015
+ "epoch": 2.326388888888889,
1016
+ "grad_norm": 0.08119193464517593,
1017
+ "learning_rate": 2.9743370248538017e-06,
1018
+ "loss": 9.7722,
1019
+ "step": 1340
1020
+ },
1021
+ {
1022
+ "epoch": 2.34375,
1023
+ "grad_norm": 0.13422377407550812,
1024
+ "learning_rate": 2.6173414408598827e-06,
1025
+ "loss": 9.771,
1026
+ "step": 1350
1027
+ },
1028
+ {
1029
+ "epoch": 2.34375,
1030
+ "eval_loss": 9.774312973022461,
1031
+ "eval_runtime": 2.0132,
1032
+ "eval_samples_per_second": 240.915,
1033
+ "eval_steps_per_second": 60.601,
1034
+ "step": 1350
1035
  }
1036
  ],
1037
  "logging_steps": 10,
 
1060
  "attributes": {}
1061
  }
1062
  },
1063
+ "total_flos": 189290889216000.0,
1064
  "train_batch_size": 4,
1065
  "trial_name": null,
1066
  "trial_params": null