vaibhav1 commited on
Commit
7087221
·
verified ·
1 Parent(s): f4c106e

Training in progress, step 180, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66012ccb2e672254b4feae075afca231c263ad6547e502796d4b21e7df42afdf
3
  size 27280152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ee321f80ef10ff7f70c0b916088e62e63301e960bbb19329669742c451bdf3
3
  size 27280152
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a359c8fcf31918c8aa7a4cfb8dee8dbb4f0d51124b5e818a003d8f20fdbb21df
3
  size 54633978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7558567ed3ae2948f27aa20eb53d1e2d55c3c362483785657b14d4c1df604c7
3
  size 54633978
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1109adbe12059c6c8cad0cd1f504ae1fe6537b864ec466879513aafd9c107a7c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ffa9c2c105bb48c868c99c24f95c12d4c685abd40387d72157912c716838a60
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb926ea15dfbbdfcbf10b4eeb7bd912f9e27d82ce5588f4af48957c623a6a487
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:894d0e48bf1444f129e12325905662a936cdeeb9fec3a46a0155b3b08f997b67
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e0466309d6ddfb8ea316cec28e551aa3aa5b773e47de4fb79fdcd97127d3d71
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1255db14e553e59635768b23aee538de1fba4f2cb6eff8d03df681021ffb34
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7600950118764845,
6
  "eval_steps": 20,
7
- "global_step": 160,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -144,6 +144,23 @@
144
  "eval_samples_per_second": 2.43,
145
  "eval_steps_per_second": 0.305,
146
  "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 20,
@@ -163,7 +180,7 @@
163
  "attributes": {}
164
  }
165
  },
166
- "total_flos": 4.338847435948032e+16,
167
  "train_batch_size": 4,
168
  "trial_name": null,
169
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8551068883610451,
6
  "eval_steps": 20,
7
+ "global_step": 180,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 2.43,
145
  "eval_steps_per_second": 0.305,
146
  "step": 160
147
+ },
148
+ {
149
+ "epoch": 0.8551068883610451,
150
+ "grad_norm": 0.5900708436965942,
151
+ "learning_rate": 0.00018858031147626325,
152
+ "loss": 1.6863,
153
+ "step": 180
154
+ },
155
+ {
156
+ "epoch": 0.8551068883610451,
157
+ "eval_loss": 1.7521910667419434,
158
+ "eval_mean_token_accuracy": 0.5876746786401627,
159
+ "eval_num_tokens": 1133445.0,
160
+ "eval_runtime": 151.8321,
161
+ "eval_samples_per_second": 2.47,
162
+ "eval_steps_per_second": 0.31,
163
+ "step": 180
164
  }
165
  ],
166
  "logging_steps": 20,
 
180
  "attributes": {}
181
  }
182
  },
183
+ "total_flos": 4.864305054302208e+16,
184
  "train_batch_size": 4,
185
  "trial_name": null,
186
  "trial_params": null