ben81828 commited on
Commit
c679bf2
·
verified ·
1 Parent(s): f41f87d

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:631a5c86aa988584fd6fd8d5a0cb756846cc35eb4e83266e3c3a1bd1efbe8675
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3df1923d9fa4d4ba4210c00d4064b1217e6de0d9fae21da85801e619830098
3
  size 29034840
last-checkpoint/global_step10000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d240048f026803dbddd1724523a7ca2de2827f7f4046a28bec7612506c1f4896
3
+ size 43429616
last-checkpoint/global_step10000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9d739eaf611dfc542e6970b3bce4c233d874be1a835af91983ff1b48e6133d
3
+ size 43429616
last-checkpoint/global_step10000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b477d28fcd4876c45a421b65b72bef6e5547331943356dd7c2650f53908619
3
+ size 43429616
last-checkpoint/global_step10000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d791eb0fa00dd6deff37b2ce257dec0682492c7016877770930a3b66f7357d
3
+ size 43429616
last-checkpoint/global_step10000/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f670f971061ee5ccc2ce8949c62d7f3b28e9592fb01ed74d60690c0b63dfeab
3
+ size 637299
last-checkpoint/global_step10000/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2394d91162704cf4261ad592249df26cc4f13718fa706ae188428fdb800ea63f
3
+ size 637171
last-checkpoint/global_step10000/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b35d849abccc721fe708faa4eb82c9af45fdf8e6aa0031f673275451eba8cc8
3
+ size 637171
last-checkpoint/global_step10000/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4de57588d8117b8dd97d287d5f926e84cda7a969757a2ba316ac4f8aff113557
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step9950
 
1
+ global_step10000
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fab7c6843889748cea19edd36fe5fbfaff72ee3e41effe7fe89f29203c14111c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08cce78177d96f46cb05953508580d717e9ab4cfa0af96f68f8fbf922cd3832f
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e43d2fa2c10094840ce71d7e9e55fbc58ee7f77c0b03b1db8fd6f075f9c0f27
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dd5e724cd1bc66d3308b441566c50035a5b4cfe9d731ef333cb45aff2efa0a8
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e2f22d92415d5cf3cbddae9ec0efda5499dc3d500ac143d48fc150eef2a1189
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db95c164a3f9f096591c54d374b04e63d0f313de87d240a7e902ed5ba07a81ca
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b645698b8b37cd361689496ae39c89be8d967d0363195d1218e27662fac1a63a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac2f9e5ecbefb231e5ddf28d7401794dd06330430a1d71c9e093f1b6b293b05
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f314e8881b07c179180285733449c92a86d3f742ada2b7d4c7cec319acb0388b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e4f4d6952a4344fd27915c35f70bbe655fc6d5c21ec3a8fcbb8a3a75429e23
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.04092838987708092,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
4
- "epoch": 1.2190438271415882,
5
  "eval_steps": 50,
6
- "global_step": 9950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -17718,11 +17718,100 @@
17718
  "eval_steps_per_second": 0.735,
17719
  "num_input_tokens_seen": 66462256,
17720
  "step": 9950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17721
  }
17722
  ],
17723
  "logging_steps": 5,
17724
  "max_steps": 16324,
17725
- "num_input_tokens_seen": 66462256,
17726
  "num_train_epochs": 2,
17727
  "save_steps": 50,
17728
  "stateful_callbacks": {
@@ -17737,7 +17826,7 @@
17737
  "attributes": {}
17738
  }
17739
  },
17740
- "total_flos": 4168859985248256.0,
17741
  "train_batch_size": 1,
17742
  "trial_name": null,
17743
  "trial_params": null
 
1
  {
2
  "best_metric": 0.04092838987708092,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
4
+ "epoch": 1.2251692138066215,
5
  "eval_steps": 50,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
17718
  "eval_steps_per_second": 0.735,
17719
  "num_input_tokens_seen": 66462256,
17720
  "step": 9950
17721
+ },
17722
+ {
17723
+ "epoch": 1.2196563658080917,
17724
+ "grad_norm": 1.4544809627964557,
17725
+ "learning_rate": 3.615874587797381e-05,
17726
+ "loss": 0.2162,
17727
+ "num_input_tokens_seen": 66495928,
17728
+ "step": 9955
17729
+ },
17730
+ {
17731
+ "epoch": 1.220268904474595,
17732
+ "grad_norm": 1.314166192503101,
17733
+ "learning_rate": 3.6110084317858005e-05,
17734
+ "loss": 0.2371,
17735
+ "num_input_tokens_seen": 66529384,
17736
+ "step": 9960
17737
+ },
17738
+ {
17739
+ "epoch": 1.2208814431410984,
17740
+ "grad_norm": 1.8272913355200002,
17741
+ "learning_rate": 3.606143701000271e-05,
17742
+ "loss": 0.2425,
17743
+ "num_input_tokens_seen": 66563312,
17744
+ "step": 9965
17745
+ },
17746
+ {
17747
+ "epoch": 1.2214939818076016,
17748
+ "grad_norm": 1.1190635076196744,
17749
+ "learning_rate": 3.6012804004324326e-05,
17750
+ "loss": 0.228,
17751
+ "num_input_tokens_seen": 66596840,
17752
+ "step": 9970
17753
+ },
17754
+ {
17755
+ "epoch": 1.2221065204741048,
17756
+ "grad_norm": 1.417082438708923,
17757
+ "learning_rate": 3.596418535072451e-05,
17758
+ "loss": 0.2407,
17759
+ "num_input_tokens_seen": 66630168,
17760
+ "step": 9975
17761
+ },
17762
+ {
17763
+ "epoch": 1.2227190591406083,
17764
+ "grad_norm": 1.5615601472935723,
17765
+ "learning_rate": 3.591558109909025e-05,
17766
+ "loss": 0.2049,
17767
+ "num_input_tokens_seen": 66663848,
17768
+ "step": 9980
17769
+ },
17770
+ {
17771
+ "epoch": 1.2233315978071115,
17772
+ "grad_norm": 1.061850813247257,
17773
+ "learning_rate": 3.586699129929373e-05,
17774
+ "loss": 0.1661,
17775
+ "num_input_tokens_seen": 66698912,
17776
+ "step": 9985
17777
+ },
17778
+ {
17779
+ "epoch": 1.223944136473615,
17780
+ "grad_norm": 1.0446245994987036,
17781
+ "learning_rate": 3.5818416001192276e-05,
17782
+ "loss": 0.2329,
17783
+ "num_input_tokens_seen": 66732112,
17784
+ "step": 9990
17785
+ },
17786
+ {
17787
+ "epoch": 1.2245566751401182,
17788
+ "grad_norm": 1.0453311293261525,
17789
+ "learning_rate": 3.576985525462841e-05,
17790
+ "loss": 0.2364,
17791
+ "num_input_tokens_seen": 66765832,
17792
+ "step": 9995
17793
+ },
17794
+ {
17795
+ "epoch": 1.2251692138066215,
17796
+ "grad_norm": 1.022044445150293,
17797
+ "learning_rate": 3.572130910942964e-05,
17798
+ "loss": 0.2736,
17799
+ "num_input_tokens_seen": 66798760,
17800
+ "step": 10000
17801
+ },
17802
+ {
17803
+ "epoch": 1.2251692138066215,
17804
+ "eval_loss": 0.15618817508220673,
17805
+ "eval_runtime": 20.4016,
17806
+ "eval_samples_per_second": 2.941,
17807
+ "eval_steps_per_second": 0.735,
17808
+ "num_input_tokens_seen": 66798760,
17809
+ "step": 10000
17810
  }
17811
  ],
17812
  "logging_steps": 5,
17813
  "max_steps": 16324,
17814
+ "num_input_tokens_seen": 66798760,
17815
  "num_train_epochs": 2,
17816
  "save_steps": 50,
17817
  "stateful_callbacks": {
 
17826
  "attributes": {}
17827
  }
17828
  },
17829
+ "total_flos": 4189946547535872.0,
17830
  "train_batch_size": 1,
17831
  "trial_name": null,
17832
  "trial_params": null