iamnguyen commited on
Commit
380cb71
·
verified ·
1 Parent(s): 63b9fea

Training in progress, step 116, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
24
- "q_proj",
25
- "o_proj",
26
  "down_proj",
 
 
27
  "up_proj",
28
- "v_proj",
29
- "k_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "down_proj",
24
+ "q_proj",
25
+ "gate_proj",
26
  "up_proj",
27
+ "k_proj",
28
+ "o_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a382456c446f34b195fbfd702a439be3a1e58d2a36d956d357f01865a9f79d8
3
  size 147770496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6afb5fe8354a6bf44657911c90884a5f22d13610a6fdc04cedfb7adbdf5a6136
3
  size 147770496
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f98750a3e57cc2fc9efbb170ea0c2d4e6e07105ecd32dc2e6b33fd619403c2a
3
- size 74440308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d7691b06ed9b50fbc0611aee62af3da692001feabdc448d28cef6fec0dd397f
3
+ size 75455362
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de022fc7aa1925c72ae7474cead510ec1b0250a1879a157882455c4937721e6f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2d3bdbefea3cedf8425e160a4a6e6072c7d80ec0aea4a71c41506faa0303a4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db8ff398e296fc80d27c81c792fc561b8565bd13d3296779cf24d36d13c2df0f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:024d9e78a1ac8400cbdd370a836aa056131227ba28580cf644bcc2ef5b956783
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04339712027898149,
5
  "eval_steps": 500,
6
- "global_step": 112,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -791,6 +791,34 @@
791
  "learning_rate": 9.986107292389465e-06,
792
  "loss": 1.7713,
793
  "step": 112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
  }
795
  ],
796
  "logging_steps": 1.0,
@@ -810,7 +838,7 @@
810
  "attributes": {}
811
  }
812
  },
813
- "total_flos": 1.1051198111020954e+17,
814
  "train_batch_size": 1,
815
  "trial_name": null,
816
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04494701743180225,
5
  "eval_steps": 500,
6
+ "global_step": 116,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
791
  "learning_rate": 9.986107292389465e-06,
792
  "loss": 1.7713,
793
  "step": 112
794
+ },
795
+ {
796
+ "epoch": 0.04378459456718668,
797
+ "grad_norm": 0.10954278707504272,
798
+ "learning_rate": 9.985640566680987e-06,
799
+ "loss": 1.7273,
800
+ "step": 113
801
+ },
802
+ {
803
+ "epoch": 0.04417206885539187,
804
+ "grad_norm": 0.13270458579063416,
805
+ "learning_rate": 9.98516614140173e-06,
806
+ "loss": 1.7355,
807
+ "step": 114
808
+ },
809
+ {
810
+ "epoch": 0.044559543143597065,
811
+ "grad_norm": 0.12262709438800812,
812
+ "learning_rate": 9.984684017284372e-06,
813
+ "loss": 1.6893,
814
+ "step": 115
815
+ },
816
+ {
817
+ "epoch": 0.04494701743180225,
818
+ "grad_norm": 0.11373162269592285,
819
+ "learning_rate": 9.98419419507348e-06,
820
+ "loss": 1.7803,
821
+ "step": 116
822
  }
823
  ],
824
  "logging_steps": 1.0,
 
838
  "attributes": {}
839
  }
840
  },
841
+ "total_flos": 1.1443990140785664e+17,
842
  "train_batch_size": 1,
843
  "trial_name": null,
844
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fdbe8ab53e2bdf789bc2d047a904abd00bae7cba146ca495869a765cce314de
3
- size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4076fd30d59e6db9c1314dd4fbfa0803720cb806be59b54c3307e3dfdb52b74a
3
+ size 5560