iamnguyen commited on
Commit
f1e3b33
·
verified ·
1 Parent(s): 43a65d9

Training in progress, step 2204, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -21,12 +21,12 @@
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
- "q_proj",
 
25
  "up_proj",
26
  "o_proj",
27
- "down_proj",
28
- "gate_proj",
29
- "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
21
  "revision": null,
22
  "target_modules": [
23
  "k_proj",
24
+ "down_proj",
25
+ "v_proj",
26
  "up_proj",
27
  "o_proj",
28
+ "q_proj",
29
+ "gate_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4ebfd33b7abf52b0221e9e6438f57ad9ade398d642264074d5d78d52f2f5585
3
  size 147770496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd256ed2cf42f5e6bc1d967561fc9127cf3d00a74d8f5a7e02bc912e9049cbd
3
  size 147770496
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9619aab8a0a055b531f39ab557331b389a9fa2d2149d494c38f79b5da0e3fcf4
3
  size 75455810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04f754a5c68f69f9b9c234616ef4b49186fe7b13f01c59b58c7a934af44d58eb
3
  size 75455810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ac4b4577a5f40e218c95809a4ca1e01ef0e14557bfe8b9b2264ff12642a30a5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5bf72d5bfc771492b725bac038dec0ee052dc91b53de82cb1ff14643a5998ad
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aceee179503688e049bca3e607cee6f931f429ae5aad6975d08daff2bbd396c8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6206caf8c53a2313ff866514fbfe50f942337f5cabab9c55ae18e70725c9d9e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8524434340514221,
5
  "eval_steps": 500,
6
- "global_step": 2200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -15407,6 +15407,34 @@
15407
  "learning_rate": 5.472263101036212e-07,
15408
  "loss": 1.4325,
15409
  "step": 2200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15410
  }
15411
  ],
15412
  "logging_steps": 1.0,
@@ -15426,7 +15454,7 @@
15426
  "attributes": {}
15427
  }
15428
  },
15429
- "total_flos": 2.1761117551250289e+18,
15430
  "train_batch_size": 1,
15431
  "trial_name": null,
15432
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8539933312042428,
5
  "eval_steps": 500,
6
+ "global_step": 2204,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
15407
  "learning_rate": 5.472263101036212e-07,
15408
  "loss": 1.4325,
15409
  "step": 2200
15410
+ },
15411
+ {
15412
+ "epoch": 0.8528309083396273,
15413
+ "grad_norm": 0.17984230816364288,
15414
+ "learning_rate": 5.444033342223832e-07,
15415
+ "loss": 1.4107,
15416
+ "step": 2201
15417
+ },
15418
+ {
15419
+ "epoch": 0.8532183826278325,
15420
+ "grad_norm": 0.1888173371553421,
15421
+ "learning_rate": 5.41587239338971e-07,
15422
+ "loss": 1.3489,
15423
+ "step": 2202
15424
+ },
15425
+ {
15426
+ "epoch": 0.8536058569160376,
15427
+ "grad_norm": 0.19261683523654938,
15428
+ "learning_rate": 5.38778029802417e-07,
15429
+ "loss": 1.3745,
15430
+ "step": 2203
15431
+ },
15432
+ {
15433
+ "epoch": 0.8539933312042428,
15434
+ "grad_norm": 0.19721238315105438,
15435
+ "learning_rate": 5.359757099511237e-07,
15436
+ "loss": 1.3864,
15437
+ "step": 2204
15438
  }
15439
  ],
15440
  "logging_steps": 1.0,
 
15454
  "attributes": {}
15455
  }
15456
  },
15457
+ "total_flos": 2.1800206142446787e+18,
15458
  "train_batch_size": 1,
15459
  "trial_name": null,
15460
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c4386e79026127651aadb56025eb1882c7820cff5922fb5a0636ad098d66e40
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16fb774fc7374c972ea93aaa72bc1893a3284121b69ae795a08735c78b3f7dc5
3
  size 5560