ben81828 commited on
Commit
15021b1
·
verified ·
1 Parent(s): f9e8b93

Training in progress, step 10750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d437bc70c4c04c351d9b268de8b986e9c10960030d11d9b5cc0a07a032d4e75
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a21c4fe29a9103dca860aa3bb15c0e288b74846b4a59c29b3fd8a0bf27740b
3
  size 29034840
last-checkpoint/global_step10750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43e2601f2d3c98983b4ee43fa2686c00b346705e83fc3ea0a86bc3417b5d420c
3
+ size 43429616
last-checkpoint/global_step10750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b687950ea88705f4a6f202a38ff6ac1e7f9cd3e488cd1dc631c5bc9f940da5
3
+ size 43429616
last-checkpoint/global_step10750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7fa7bc5f002e6adaf92b93e1b55e50c26081ee1ce65671b97ac17ca5fca3226
3
+ size 43429616
last-checkpoint/global_step10750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:271a22975994b10de58033da1bc3c5226bf30c17750f50f91d72fe94c629e664
3
+ size 43429616
last-checkpoint/global_step10750/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9861c41f1725f2f024985cc75b883aee43d7c1e94a0df5bd1f8fbbc3450946c2
3
+ size 637299
last-checkpoint/global_step10750/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8d3b758679140b5987ca75db93e8f9770288ea2b70a76fa1f38887c1e209697
3
+ size 637171
last-checkpoint/global_step10750/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098ea93049be2fbec3368ad9527cda183fab040b92eb46206600628c4d8e6a97
3
+ size 637171
last-checkpoint/global_step10750/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9efc89e450a48f389ff9fa07b9ba567e69ecd990abac861b572e76644f9411e5
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step10650
 
1
+ global_step10750
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:882705638474934670faaaf11f480a9ca965116088d43b545d3c16f14930bd88
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c08e0df7f2c84b8164ab107eb8d3cd4a985cd82fe5a60ae9e364a68ec3639a3
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b49e541bf1b6ee313511bcb33f9c57c56d53d3ca5fc5060ada8ae009ad408c0
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55edbc753eb49d62368d11f179521fb9d5ae7a41bf3df439be5b58898588d862
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c575f73a7906ffee30747f33b3ff9d606e71678d1c70f907532bff0706c91b26
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:389ea1a6d6727e263f6a278450853d396adad7f849ee20ba1051d7524e720482
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad85b3229606911aae4c7db32cce537b0b9493ac5a6f9cb5cde256955cbe0543
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d23b87e78b4bd2aa5e9af52b52401821aafa218a5f4b2f71fc3a30e0dd77c67
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:284555ebd24f2f7c70a3cb7790c9d4c47d92d96814317f7156c53c32553482a5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81f6af293e8bd78f46769a700265966fbd8048df89a7524d9c0b66db799d7a8e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.04092838987708092,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
4
- "epoch": 1.3047992404520534,
5
  "eval_steps": 50,
6
- "global_step": 10650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -18964,11 +18964,189 @@
18964
  "eval_steps_per_second": 0.768,
18965
  "num_input_tokens_seen": 71167464,
18966
  "step": 10650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18967
  }
18968
  ],
18969
  "logging_steps": 5,
18970
  "max_steps": 16324,
18971
- "num_input_tokens_seen": 71167464,
18972
  "num_train_epochs": 2,
18973
  "save_steps": 50,
18974
  "stateful_callbacks": {
@@ -18983,7 +19161,7 @@
18983
  "attributes": {}
18984
  }
18985
  },
18986
- "total_flos": 4464001544224768.0,
18987
  "train_batch_size": 1,
18988
  "trial_name": null,
18989
  "trial_params": null
 
1
  {
2
  "best_metric": 0.04092838987708092,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
4
+ "epoch": 1.31705001378212,
5
  "eval_steps": 50,
6
+ "global_step": 10750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
18964
  "eval_steps_per_second": 0.768,
18965
  "num_input_tokens_seen": 71167464,
18966
  "step": 10650
18967
+ },
18968
+ {
18969
+ "epoch": 1.3054117791185569,
18970
+ "grad_norm": 1.206825055856801,
18971
+ "learning_rate": 2.950689735821428e-05,
18972
+ "loss": 0.2511,
18973
+ "num_input_tokens_seen": 71200776,
18974
+ "step": 10655
18975
+ },
18976
+ {
18977
+ "epoch": 1.3060243177850601,
18978
+ "grad_norm": 0.8738688843235899,
18979
+ "learning_rate": 2.9460709454307668e-05,
18980
+ "loss": 0.2303,
18981
+ "num_input_tokens_seen": 71234168,
18982
+ "step": 10660
18983
+ },
18984
+ {
18985
+ "epoch": 1.3066368564515636,
18986
+ "grad_norm": 1.2670653369653868,
18987
+ "learning_rate": 2.9414542625498032e-05,
18988
+ "loss": 0.2423,
18989
+ "num_input_tokens_seen": 71268008,
18990
+ "step": 10665
18991
+ },
18992
+ {
18993
+ "epoch": 1.3072493951180668,
18994
+ "grad_norm": 1.0765877081535313,
18995
+ "learning_rate": 2.9368396919156543e-05,
18996
+ "loss": 0.2461,
18997
+ "num_input_tokens_seen": 71301256,
18998
+ "step": 10670
18999
+ },
19000
+ {
19001
+ "epoch": 1.30786193378457,
19002
+ "grad_norm": 1.0930641133259775,
19003
+ "learning_rate": 2.932227238263271e-05,
19004
+ "loss": 0.2585,
19005
+ "num_input_tokens_seen": 71334304,
19006
+ "step": 10675
19007
+ },
19008
+ {
19009
+ "epoch": 1.3084744724510735,
19010
+ "grad_norm": 1.5727229135478251,
19011
+ "learning_rate": 2.927616906325432e-05,
19012
+ "loss": 0.2558,
19013
+ "num_input_tokens_seen": 71367784,
19014
+ "step": 10680
19015
+ },
19016
+ {
19017
+ "epoch": 1.3090870111175767,
19018
+ "grad_norm": 1.1094592860069892,
19019
+ "learning_rate": 2.9230087008327368e-05,
19020
+ "loss": 0.2396,
19021
+ "num_input_tokens_seen": 71400960,
19022
+ "step": 10685
19023
+ },
19024
+ {
19025
+ "epoch": 1.3096995497840802,
19026
+ "grad_norm": 1.248724168955527,
19027
+ "learning_rate": 2.9184026265136056e-05,
19028
+ "loss": 0.2212,
19029
+ "num_input_tokens_seen": 71433712,
19030
+ "step": 10690
19031
+ },
19032
+ {
19033
+ "epoch": 1.3103120884505834,
19034
+ "grad_norm": 1.7911517740511143,
19035
+ "learning_rate": 2.9137986880942715e-05,
19036
+ "loss": 0.2074,
19037
+ "num_input_tokens_seen": 71467256,
19038
+ "step": 10695
19039
+ },
19040
+ {
19041
+ "epoch": 1.3109246271170867,
19042
+ "grad_norm": 1.2413107560000207,
19043
+ "learning_rate": 2.909196890298773e-05,
19044
+ "loss": 0.206,
19045
+ "num_input_tokens_seen": 71501128,
19046
+ "step": 10700
19047
+ },
19048
+ {
19049
+ "epoch": 1.3109246271170867,
19050
+ "eval_loss": 0.19016502797603607,
19051
+ "eval_runtime": 19.7377,
19052
+ "eval_samples_per_second": 3.04,
19053
+ "eval_steps_per_second": 0.76,
19054
+ "num_input_tokens_seen": 71501128,
19055
+ "step": 10700
19056
+ },
19057
+ {
19058
+ "epoch": 1.3115371657835901,
19059
+ "grad_norm": 1.440592474356754,
19060
+ "learning_rate": 2.9045972378489604e-05,
19061
+ "loss": 0.2671,
19062
+ "num_input_tokens_seen": 71534760,
19063
+ "step": 10705
19064
+ },
19065
+ {
19066
+ "epoch": 1.3121497044500934,
19067
+ "grad_norm": 1.6043559122436624,
19068
+ "learning_rate": 2.8999997354644698e-05,
19069
+ "loss": 0.2431,
19070
+ "num_input_tokens_seen": 71568000,
19071
+ "step": 10710
19072
+ },
19073
+ {
19074
+ "epoch": 1.3127622431165968,
19075
+ "grad_norm": 1.6562341112618186,
19076
+ "learning_rate": 2.8954043878627414e-05,
19077
+ "loss": 0.2767,
19078
+ "num_input_tokens_seen": 71601664,
19079
+ "step": 10715
19080
+ },
19081
+ {
19082
+ "epoch": 1.3133747817831,
19083
+ "grad_norm": 1.3116654592131844,
19084
+ "learning_rate": 2.890811199759003e-05,
19085
+ "loss": 0.2077,
19086
+ "num_input_tokens_seen": 71635832,
19087
+ "step": 10720
19088
+ },
19089
+ {
19090
+ "epoch": 1.3139873204496033,
19091
+ "grad_norm": 13.474160151030459,
19092
+ "learning_rate": 2.886220175866261e-05,
19093
+ "loss": 0.2503,
19094
+ "num_input_tokens_seen": 71669720,
19095
+ "step": 10725
19096
+ },
19097
+ {
19098
+ "epoch": 1.3145998591161068,
19099
+ "grad_norm": 1.3031307301091186,
19100
+ "learning_rate": 2.881631320895306e-05,
19101
+ "loss": 0.2579,
19102
+ "num_input_tokens_seen": 71703304,
19103
+ "step": 10730
19104
+ },
19105
+ {
19106
+ "epoch": 1.31521239778261,
19107
+ "grad_norm": 1.193828203752651,
19108
+ "learning_rate": 2.8770446395547036e-05,
19109
+ "loss": 0.2089,
19110
+ "num_input_tokens_seen": 71737192,
19111
+ "step": 10735
19112
+ },
19113
+ {
19114
+ "epoch": 1.3158249364491135,
19115
+ "grad_norm": 1.0929056779676314,
19116
+ "learning_rate": 2.87246013655079e-05,
19117
+ "loss": 0.2317,
19118
+ "num_input_tokens_seen": 71770632,
19119
+ "step": 10740
19120
+ },
19121
+ {
19122
+ "epoch": 1.3164374751156167,
19123
+ "grad_norm": 1.0813012821730756,
19124
+ "learning_rate": 2.8678778165876556e-05,
19125
+ "loss": 0.2344,
19126
+ "num_input_tokens_seen": 71804112,
19127
+ "step": 10745
19128
+ },
19129
+ {
19130
+ "epoch": 1.31705001378212,
19131
+ "grad_norm": 1.228614756425054,
19132
+ "learning_rate": 2.863297684367164e-05,
19133
+ "loss": 0.184,
19134
+ "num_input_tokens_seen": 71837520,
19135
+ "step": 10750
19136
+ },
19137
+ {
19138
+ "epoch": 1.31705001378212,
19139
+ "eval_loss": 0.2411387860774994,
19140
+ "eval_runtime": 19.9995,
19141
+ "eval_samples_per_second": 3.0,
19142
+ "eval_steps_per_second": 0.75,
19143
+ "num_input_tokens_seen": 71837520,
19144
+ "step": 10750
19145
  }
19146
  ],
19147
  "logging_steps": 5,
19148
  "max_steps": 16324,
19149
+ "num_input_tokens_seen": 71837520,
19150
  "num_train_epochs": 2,
19151
  "save_steps": 50,
19152
  "stateful_callbacks": {
 
19161
  "attributes": {}
19162
  }
19163
  },
19164
+ "total_flos": 4505992934326272.0,
19165
  "train_batch_size": 1,
19166
  "trial_name": null,
19167
  "trial_params": null