simon-mellergaard commited on
Commit
c09120d
·
verified ·
1 Parent(s): cbf94e4

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a22c8dadae3169476a58e86197c64b4855ed3edd99cb31a9fe32e1fdb79687a8
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8c088dd2439cacfe1e095ae943fafbed28ec1cabf8664b50579745419bf06e
3
  size 598898116
run-1/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:749188f365e911f11256fe2945c70c092d551fab90fc399eb8b9c37c89046ee0
3
  size 598898116
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8c088dd2439cacfe1e095ae943fafbed28ec1cabf8664b50579745419bf06e
3
  size 598898116
run-1/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a71d877b7f777422c9898fd486ca34976de9cf71a59fb99937afd46c8b426cce
3
  size 1197884026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67a9bc1cb444595afd4387a247d83b5259d39585d2e460af5046ec910cdef4eb
3
  size 1197884026
run-1/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:306c6af96669ed13365a61a72a74a28f181e5e20ceb2078a041489ebbb847646
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d73e26459c92e195a7738eb8b675e4359a54bd79fe9532dd9d1f04483172c359
3
  size 1064
run-1/checkpoint-500/trainer_state.json CHANGED
@@ -11,50 +11,50 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.7729032258064517,
15
- "eval_loss": 3.520946979522705,
16
- "eval_runtime": 20.0431,
17
- "eval_samples_per_second": 154.667,
18
- "eval_steps_per_second": 1.646,
19
  "step": 159
20
  },
21
  {
22
  "epoch": 1.2578616352201257,
23
- "grad_norm": 6.797354698181152,
24
- "learning_rate": 1.5828092243186584e-05,
25
- "loss": 5.8294,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_accuracy": 0.9245161290322581,
31
- "eval_loss": 1.7692639827728271,
32
- "eval_runtime": 20.2508,
33
- "eval_samples_per_second": 153.081,
34
- "eval_steps_per_second": 1.63,
35
  "step": 318
36
  },
37
  {
38
  "epoch": 2.5157232704402515,
39
- "grad_norm": 4.905786514282227,
40
- "learning_rate": 1.1635220125786164e-05,
41
- "loss": 1.9917,
42
  "step": 400
43
  },
44
  {
45
  "epoch": 3.0,
46
- "eval_accuracy": 0.9438709677419355,
47
- "eval_loss": 1.2671879529953003,
48
- "eval_runtime": 20.1246,
49
- "eval_samples_per_second": 154.04,
50
- "eval_steps_per_second": 1.64,
51
  "step": 477
52
  }
53
  ],
54
  "logging_steps": 200,
55
- "max_steps": 954,
56
  "num_input_tokens_seen": 0,
57
- "num_train_epochs": 6,
58
  "save_steps": 500,
59
  "stateful_callbacks": {
60
  "TrainerControl": {
@@ -72,8 +72,8 @@
72
  "train_batch_size": 96,
73
  "trial_name": null,
74
  "trial_params": {
75
- "alpha": 0.4005807540566969,
76
- "num_train_epochs": 6,
77
- "temperature": 8
78
  }
79
  }
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6248387096774194,
15
+ "eval_loss": 2.5749671459198,
16
+ "eval_runtime": 22.9474,
17
+ "eval_samples_per_second": 135.092,
18
+ "eval_steps_per_second": 1.438,
19
  "step": 159
20
  },
21
  {
22
  "epoch": 1.2578616352201257,
23
+ "grad_norm": 5.373847961425781,
24
+ "learning_rate": 1.49937106918239e-05,
25
+ "loss": 3.5751,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_accuracy": 0.8938709677419355,
31
+ "eval_loss": 1.5459049940109253,
32
+ "eval_runtime": 22.8738,
33
+ "eval_samples_per_second": 135.526,
34
+ "eval_steps_per_second": 1.443,
35
  "step": 318
36
  },
37
  {
38
  "epoch": 2.5157232704402515,
39
+ "grad_norm": 3.5598983764648438,
40
+ "learning_rate": 9.962264150943397e-06,
41
+ "loss": 1.6838,
42
  "step": 400
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "eval_accuracy": 0.9290322580645162,
47
+ "eval_loss": 1.163897156715393,
48
+ "eval_runtime": 22.8793,
49
+ "eval_samples_per_second": 135.494,
50
+ "eval_steps_per_second": 1.442,
51
  "step": 477
52
  }
53
  ],
54
  "logging_steps": 200,
55
+ "max_steps": 795,
56
  "num_input_tokens_seen": 0,
57
+ "num_train_epochs": 5,
58
  "save_steps": 500,
59
  "stateful_callbacks": {
60
  "TrainerControl": {
 
72
  "train_batch_size": 96,
73
  "trial_name": null,
74
  "trial_params": {
75
+ "alpha": 0.30165501089690516,
76
+ "num_train_epochs": 5,
77
+ "temperature": 19
78
  }
79
  }
run-1/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:166ac319de89d357910c885138ea139bcf526da11716bd9af418e8fabca59416
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f2a6043dcb359bf6e5c547e39efa179f167939f5b2be51fd4993780a524ca74
3
  size 5368
runs/Sep28_10-21-38_bb2384aee55a/events.out.tfevents.1759054927.bb2384aee55a.76.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57ec3628ce3320a9ec6e249667d33856d640f0486e9f42d916989fc94ca5f597
3
- size 29687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:917c5feb177169e9871fa7d66bc2801741e1567f4fe7880724d0d0841478b737
3
+ size 30364
runs/Sep28_10-21-38_bb2384aee55a/events.out.tfevents.1759056641.bb2384aee55a.76.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7adeae347ab7f5269da99be778fbbefeb0917578f24213cebcfb1ddd2f4e64fc
3
+ size 14501
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1b5ead62771fb4a133ea7812d8458dd760625e18862235c9bfe1d87fb1d371e
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f2a6043dcb359bf6e5c547e39efa179f167939f5b2be51fd4993780a524ca74
3
  size 5368