samil24 commited on
Commit
9bd9d72
·
verified ·
1 Parent(s): 5a51e13

Training in progress, epoch 1

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. model.safetensors +1 -1
  2. run-0/checkpoint-117/model.safetensors +1 -1
  3. run-0/checkpoint-117/optimizer.pt +1 -1
  4. run-0/checkpoint-117/scheduler.pt +1 -1
  5. run-0/checkpoint-117/trainer_state.json +23 -23
  6. run-0/checkpoint-117/training_args.bin +1 -1
  7. run-0/checkpoint-39/model.safetensors +1 -1
  8. run-0/checkpoint-39/optimizer.pt +1 -1
  9. run-0/checkpoint-39/scheduler.pt +1 -1
  10. run-0/checkpoint-39/trainer_state.json +10 -10
  11. run-0/checkpoint-39/training_args.bin +1 -1
  12. run-0/checkpoint-78/model.safetensors +1 -1
  13. run-0/checkpoint-78/optimizer.pt +1 -1
  14. run-0/checkpoint-78/scheduler.pt +1 -1
  15. run-0/checkpoint-78/trainer_state.json +15 -15
  16. run-0/checkpoint-78/training_args.bin +1 -1
  17. run-1/checkpoint-117/trainer_state.json +19 -19
  18. run-1/checkpoint-156/trainer_state.json +24 -24
  19. run-1/checkpoint-195/trainer_state.json +29 -29
  20. run-1/checkpoint-39/model.safetensors +1 -1
  21. run-1/checkpoint-39/optimizer.pt +1 -1
  22. run-1/checkpoint-39/scheduler.pt +1 -1
  23. run-1/checkpoint-39/trainer_state.json +7 -7
  24. run-1/checkpoint-39/training_args.bin +1 -1
  25. run-1/checkpoint-78/model.safetensors +1 -1
  26. run-1/checkpoint-78/optimizer.pt +1 -1
  27. run-1/checkpoint-78/scheduler.pt +1 -1
  28. run-1/checkpoint-78/trainer_state.json +14 -14
  29. run-1/checkpoint-78/training_args.bin +1 -1
  30. run-9/checkpoint-117/config.json +26 -0
  31. run-9/checkpoint-117/model.safetensors +3 -0
  32. run-9/checkpoint-117/optimizer.pt +3 -0
  33. run-9/checkpoint-117/rng_state.pth +3 -0
  34. run-9/checkpoint-117/scheduler.pt +3 -0
  35. run-9/checkpoint-117/special_tokens_map.json +7 -0
  36. run-9/checkpoint-117/tokenizer.json +0 -0
  37. run-9/checkpoint-117/tokenizer_config.json +56 -0
  38. run-9/checkpoint-117/trainer_state.json +67 -0
  39. run-9/checkpoint-117/training_args.bin +3 -0
  40. run-9/checkpoint-117/vocab.txt +0 -0
  41. run-9/checkpoint-156/config.json +1 -1
  42. run-9/checkpoint-156/model.safetensors +1 -1
  43. run-9/checkpoint-156/optimizer.pt +1 -1
  44. run-9/checkpoint-156/rng_state.pth +1 -1
  45. run-9/checkpoint-156/scheduler.pt +1 -1
  46. run-9/checkpoint-156/trainer_state.json +41 -21
  47. run-9/checkpoint-156/training_args.bin +1 -1
  48. run-9/checkpoint-195/config.json +26 -0
  49. run-9/checkpoint-195/model.safetensors +3 -0
  50. run-9/checkpoint-195/optimizer.pt +3 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:465fd61ef3175cbbce2909749fc7ae5734f210f378de19d57868f31c4380a23a
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc59468e2ba140a6f6e62e3c90e1035f190056bdc9b499e74ecade2b7fa551e7
3
  size 437958648
run-0/checkpoint-117/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62adfeef9129605396dfe21c65c5fc3d59bfeefce7d8c03cb8581764a9f7944a
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce4125b396a0606ad963636d3d8c5e44f951bd4740645669474eae1f3cb814b
3
  size 437958648
run-0/checkpoint-117/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a45c1fcbf9a167a2dd3a862eee5e760ea3450db12550e273e1c3030c7c92da3
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b5187a960f1de00c4f147eb0f3746cc85cc2d1b7a30c131b1c886923b315421
3
  size 876038394
run-0/checkpoint-117/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e9dbb9cc4ed759cb224abe50df0e547d0121b53700136189150fe57d392f12b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09557b1d4da433a4489d12bec551f7b75466f796a905598e6ba8698b633264c8
3
  size 1064
run-0/checkpoint-117/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 39,
3
- "best_metric": 0.5270758122743683,
4
- "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-39",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 117,
@@ -11,36 +11,36 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5270758122743683,
15
- "eval_loss": 0.6918498277664185,
16
- "eval_runtime": 0.635,
17
- "eval_samples_per_second": 436.194,
18
- "eval_steps_per_second": 14.172,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.4729241877256318,
24
- "eval_loss": 6.468245983123779,
25
- "eval_runtime": 0.6398,
26
- "eval_samples_per_second": 432.935,
27
- "eval_steps_per_second": 14.066,
28
  "step": 78
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.4729241877256318,
33
- "eval_loss": 0.7129499912261963,
34
- "eval_runtime": 0.6364,
35
- "eval_samples_per_second": 435.269,
36
- "eval_steps_per_second": 14.142,
37
  "step": 117
38
  }
39
  ],
40
  "logging_steps": 500,
41
- "max_steps": 195,
42
  "num_input_tokens_seen": 0,
43
- "num_train_epochs": 5,
44
  "save_steps": 500,
45
  "stateful_callbacks": {
46
  "TrainerControl": {
@@ -49,7 +49,7 @@
49
  "should_evaluate": false,
50
  "should_log": false,
51
  "should_save": true,
52
- "should_training_stop": false
53
  },
54
  "attributes": {}
55
  }
@@ -59,9 +59,9 @@
59
  "trial_name": null,
60
  "trial_params": {
61
  "dropout_rate": 0.0134,
62
- "learning_rate": 0.05,
63
  "max_length": 32,
64
- "num_train_epochs": 5,
65
  "per_device_train_batch_size": 64
66
  }
67
  }
 
1
  {
2
+ "best_global_step": 117,
3
+ "best_metric": 0.6823104693140795,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-117",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 117,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6787003610108303,
15
+ "eval_loss": 0.6131929755210876,
16
+ "eval_runtime": 0.6361,
17
+ "eval_samples_per_second": 435.464,
18
+ "eval_steps_per_second": 14.149,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.6750902527075813,
24
+ "eval_loss": 0.7132726907730103,
25
+ "eval_runtime": 0.6373,
26
+ "eval_samples_per_second": 434.619,
27
+ "eval_steps_per_second": 14.121,
28
  "step": 78
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.6823104693140795,
33
+ "eval_loss": 0.7888869047164917,
34
+ "eval_runtime": 0.6455,
35
+ "eval_samples_per_second": 429.153,
36
+ "eval_steps_per_second": 13.944,
37
  "step": 117
38
  }
39
  ],
40
  "logging_steps": 500,
41
+ "max_steps": 117,
42
  "num_input_tokens_seen": 0,
43
+ "num_train_epochs": 3,
44
  "save_steps": 500,
45
  "stateful_callbacks": {
46
  "TrainerControl": {
 
49
  "should_evaluate": false,
50
  "should_log": false,
51
  "should_save": true,
52
+ "should_training_stop": true
53
  },
54
  "attributes": {}
55
  }
 
59
  "trial_name": null,
60
  "trial_params": {
61
  "dropout_rate": 0.0134,
62
+ "learning_rate": 0.0001,
63
  "max_length": 32,
64
+ "num_train_epochs": 3,
65
  "per_device_train_batch_size": 64
66
  }
67
  }
run-0/checkpoint-117/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e99cfd8ed47c6deda5dd532724592f1d8997b2921756d80eddbdaca287b3e257
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e666bae6191103ce3111bb8ce1c7c6747611fec7304e19a2fcda4daf98790d92
3
  size 5432
run-0/checkpoint-39/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:501c9d0db52de0ae3a0489f8234dd2aff7ece8059a7ea3877035dd4ced9cc94b
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2716fb232c32aa7c1891f7b4ce0a5863b69dd1aae61622661543846c2b3eebea
3
  size 437958648
run-0/checkpoint-39/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e43040edbcdbc937fbc2d1feb504b407b10f8c41ec3be86388a416d702f046bd
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:854cec4a44575fbd2cfa015e5ad662a02293b134682b7dc58d4cced3c269df12
3
  size 876038394
run-0/checkpoint-39/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:598448aee663bd69feb15b730dcfefd83bf321a2bd53ece879436b0e2e85c889
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0a7640caff23c1bb50b123dc402ae0eaf9bfbc3d4e319dc17d8121e9001bc26
3
  size 1064
run-0/checkpoint-39/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 39,
3
- "best_metric": 0.5270758122743683,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,18 +11,18 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5270758122743683,
15
- "eval_loss": 0.6918498277664185,
16
- "eval_runtime": 0.635,
17
- "eval_samples_per_second": 436.194,
18
- "eval_steps_per_second": 14.172,
19
  "step": 39
20
  }
21
  ],
22
  "logging_steps": 500,
23
- "max_steps": 195,
24
  "num_input_tokens_seen": 0,
25
- "num_train_epochs": 5,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
@@ -41,9 +41,9 @@
41
  "trial_name": null,
42
  "trial_params": {
43
  "dropout_rate": 0.0134,
44
- "learning_rate": 0.05,
45
  "max_length": 32,
46
- "num_train_epochs": 5,
47
  "per_device_train_batch_size": 64
48
  }
49
  }
 
1
  {
2
  "best_global_step": 39,
3
+ "best_metric": 0.6787003610108303,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6787003610108303,
15
+ "eval_loss": 0.6131929755210876,
16
+ "eval_runtime": 0.6361,
17
+ "eval_samples_per_second": 435.464,
18
+ "eval_steps_per_second": 14.149,
19
  "step": 39
20
  }
21
  ],
22
  "logging_steps": 500,
23
+ "max_steps": 117,
24
  "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 3,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
 
41
  "trial_name": null,
42
  "trial_params": {
43
  "dropout_rate": 0.0134,
44
+ "learning_rate": 0.0001,
45
  "max_length": 32,
46
+ "num_train_epochs": 3,
47
  "per_device_train_batch_size": 64
48
  }
49
  }
run-0/checkpoint-39/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e99cfd8ed47c6deda5dd532724592f1d8997b2921756d80eddbdaca287b3e257
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e666bae6191103ce3111bb8ce1c7c6747611fec7304e19a2fcda4daf98790d92
3
  size 5432
run-0/checkpoint-78/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8f0c9c3d76c0bfba182562429ed0c930aab0b272c6f424390f7e6b8a2dfa20e
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b2b58ba8316c1e14342d28ec70cb085c885595457fd6595ed055803edb65417
3
  size 437958648
run-0/checkpoint-78/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d8abd6ea5465e4239d42abf177c06d8c0df6edd3ead63dfb120fabebb279a91
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7001a693620ee2b9cca6597b55aa32607001b471c898c3b6b0c903fd50aa01b1
3
  size 876038394
run-0/checkpoint-78/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:562900fbc54f61683673f7d45f332e2421789e473397b69e96adb452a1719746
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c6c37a4a15bf2c6e5ac49dadc2a47206fbd55bea0d19dc3dfe3b1f35cc3fb0
3
  size 1064
run-0/checkpoint-78/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 39,
3
- "best_metric": 0.5270758122743683,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-39",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,27 +11,27 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5270758122743683,
15
- "eval_loss": 0.6918498277664185,
16
- "eval_runtime": 0.635,
17
- "eval_samples_per_second": 436.194,
18
- "eval_steps_per_second": 14.172,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.4729241877256318,
24
- "eval_loss": 6.468245983123779,
25
- "eval_runtime": 0.6398,
26
- "eval_samples_per_second": 432.935,
27
- "eval_steps_per_second": 14.066,
28
  "step": 78
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 195,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 5,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
@@ -50,9 +50,9 @@
50
  "trial_name": null,
51
  "trial_params": {
52
  "dropout_rate": 0.0134,
53
- "learning_rate": 0.05,
54
  "max_length": 32,
55
- "num_train_epochs": 5,
56
  "per_device_train_batch_size": 64
57
  }
58
  }
 
1
  {
2
  "best_global_step": 39,
3
+ "best_metric": 0.6787003610108303,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-0/checkpoint-39",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6787003610108303,
15
+ "eval_loss": 0.6131929755210876,
16
+ "eval_runtime": 0.6361,
17
+ "eval_samples_per_second": 435.464,
18
+ "eval_steps_per_second": 14.149,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.6750902527075813,
24
+ "eval_loss": 0.7132726907730103,
25
+ "eval_runtime": 0.6373,
26
+ "eval_samples_per_second": 434.619,
27
+ "eval_steps_per_second": 14.121,
28
  "step": 78
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 117,
33
  "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 3,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
 
50
  "trial_name": null,
51
  "trial_params": {
52
  "dropout_rate": 0.0134,
53
+ "learning_rate": 0.0001,
54
  "max_length": 32,
55
+ "num_train_epochs": 3,
56
  "per_device_train_batch_size": 64
57
  }
58
  }
run-0/checkpoint-78/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e99cfd8ed47c6deda5dd532724592f1d8997b2921756d80eddbdaca287b3e257
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e666bae6191103ce3111bb8ce1c7c6747611fec7304e19a2fcda4daf98790d92
3
  size 5432
run-1/checkpoint-117/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 39,
3
- "best_metric": 0.5270758122743683,
4
- "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-39",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 117,
@@ -11,29 +11,29 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5270758122743683,
15
- "eval_loss": 0.6918498277664185,
16
- "eval_runtime": 0.6348,
17
- "eval_samples_per_second": 436.374,
18
- "eval_steps_per_second": 14.178,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.4729241877256318,
24
- "eval_loss": 6.468245983123779,
25
- "eval_runtime": 0.638,
26
- "eval_samples_per_second": 434.168,
27
- "eval_steps_per_second": 14.107,
28
  "step": 78
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.4729241877256318,
33
- "eval_loss": 0.7129499912261963,
34
- "eval_runtime": 0.637,
35
- "eval_samples_per_second": 434.85,
36
- "eval_steps_per_second": 14.129,
37
  "step": 117
38
  }
39
  ],
@@ -59,7 +59,7 @@
59
  "trial_name": null,
60
  "trial_params": {
61
  "dropout_rate": 0.0134,
62
- "learning_rate": 0.05,
63
  "max_length": 32,
64
  "num_train_epochs": 5,
65
  "per_device_train_batch_size": 64
 
1
  {
2
+ "best_global_step": 117,
3
+ "best_metric": 0.6895306859205776,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-117",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
  "global_step": 117,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6462093862815884,
15
+ "eval_loss": 0.6229268908500671,
16
+ "eval_runtime": 0.6366,
17
+ "eval_samples_per_second": 435.104,
18
+ "eval_steps_per_second": 14.137,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.6570397111913358,
24
+ "eval_loss": 0.7122572064399719,
25
+ "eval_runtime": 0.6369,
26
+ "eval_samples_per_second": 434.899,
27
+ "eval_steps_per_second": 14.13,
28
  "step": 78
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.6895306859205776,
33
+ "eval_loss": 0.6926298141479492,
34
+ "eval_runtime": 0.638,
35
+ "eval_samples_per_second": 434.136,
36
+ "eval_steps_per_second": 14.106,
37
  "step": 117
38
  }
39
  ],
 
59
  "trial_name": null,
60
  "trial_params": {
61
  "dropout_rate": 0.0134,
62
+ "learning_rate": 0.0001,
63
  "max_length": 32,
64
  "num_train_epochs": 5,
65
  "per_device_train_batch_size": 64
run-1/checkpoint-156/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 39,
3
- "best_metric": 0.5270758122743683,
4
- "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-39",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
7
  "global_step": 156,
@@ -11,38 +11,38 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5270758122743683,
15
- "eval_loss": 0.6918498277664185,
16
- "eval_runtime": 0.6348,
17
- "eval_samples_per_second": 436.374,
18
- "eval_steps_per_second": 14.178,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.4729241877256318,
24
- "eval_loss": 6.468245983123779,
25
- "eval_runtime": 0.638,
26
- "eval_samples_per_second": 434.168,
27
- "eval_steps_per_second": 14.107,
28
  "step": 78
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.4729241877256318,
33
- "eval_loss": 0.7129499912261963,
34
- "eval_runtime": 0.637,
35
- "eval_samples_per_second": 434.85,
36
- "eval_steps_per_second": 14.129,
37
  "step": 117
38
  },
39
  {
40
  "epoch": 4.0,
41
- "eval_accuracy": 0.4729241877256318,
42
- "eval_loss": 0.9733805060386658,
43
- "eval_runtime": 0.6391,
44
- "eval_samples_per_second": 433.43,
45
- "eval_steps_per_second": 14.083,
46
  "step": 156
47
  }
48
  ],
@@ -68,7 +68,7 @@
68
  "trial_name": null,
69
  "trial_params": {
70
  "dropout_rate": 0.0134,
71
- "learning_rate": 0.05,
72
  "max_length": 32,
73
  "num_train_epochs": 5,
74
  "per_device_train_batch_size": 64
 
1
  {
2
+ "best_global_step": 117,
3
+ "best_metric": 0.6895306859205776,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-117",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
7
  "global_step": 156,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6462093862815884,
15
+ "eval_loss": 0.6229268908500671,
16
+ "eval_runtime": 0.6366,
17
+ "eval_samples_per_second": 435.104,
18
+ "eval_steps_per_second": 14.137,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.6570397111913358,
24
+ "eval_loss": 0.7122572064399719,
25
+ "eval_runtime": 0.6369,
26
+ "eval_samples_per_second": 434.899,
27
+ "eval_steps_per_second": 14.13,
28
  "step": 78
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.6895306859205776,
33
+ "eval_loss": 0.6926298141479492,
34
+ "eval_runtime": 0.638,
35
+ "eval_samples_per_second": 434.136,
36
+ "eval_steps_per_second": 14.106,
37
  "step": 117
38
  },
39
  {
40
  "epoch": 4.0,
41
+ "eval_accuracy": 0.6678700361010831,
42
+ "eval_loss": 0.923632025718689,
43
+ "eval_runtime": 0.6386,
44
+ "eval_samples_per_second": 433.765,
45
+ "eval_steps_per_second": 14.093,
46
  "step": 156
47
  }
48
  ],
 
68
  "trial_name": null,
69
  "trial_params": {
70
  "dropout_rate": 0.0134,
71
+ "learning_rate": 0.0001,
72
  "max_length": 32,
73
  "num_train_epochs": 5,
74
  "per_device_train_batch_size": 64
run-1/checkpoint-195/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 39,
3
- "best_metric": 0.5270758122743683,
4
- "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-39",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
7
  "global_step": 195,
@@ -11,47 +11,47 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5270758122743683,
15
- "eval_loss": 0.6918498277664185,
16
- "eval_runtime": 0.6348,
17
- "eval_samples_per_second": 436.374,
18
- "eval_steps_per_second": 14.178,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.4729241877256318,
24
- "eval_loss": 6.468245983123779,
25
- "eval_runtime": 0.638,
26
- "eval_samples_per_second": 434.168,
27
- "eval_steps_per_second": 14.107,
28
  "step": 78
29
  },
30
  {
31
  "epoch": 3.0,
32
- "eval_accuracy": 0.4729241877256318,
33
- "eval_loss": 0.7129499912261963,
34
- "eval_runtime": 0.637,
35
- "eval_samples_per_second": 434.85,
36
- "eval_steps_per_second": 14.129,
37
  "step": 117
38
  },
39
  {
40
  "epoch": 4.0,
41
- "eval_accuracy": 0.4729241877256318,
42
- "eval_loss": 0.9733805060386658,
43
- "eval_runtime": 0.6391,
44
- "eval_samples_per_second": 433.43,
45
- "eval_steps_per_second": 14.083,
46
  "step": 156
47
  },
48
  {
49
  "epoch": 5.0,
50
- "eval_accuracy": 0.4729241877256318,
51
- "eval_loss": 0.7346399426460266,
52
- "eval_runtime": 0.6373,
53
- "eval_samples_per_second": 434.654,
54
- "eval_steps_per_second": 14.122,
55
  "step": 195
56
  }
57
  ],
@@ -77,7 +77,7 @@
77
  "trial_name": null,
78
  "trial_params": {
79
  "dropout_rate": 0.0134,
80
- "learning_rate": 0.05,
81
  "max_length": 32,
82
  "num_train_epochs": 5,
83
  "per_device_train_batch_size": 64
 
1
  {
2
+ "best_global_step": 117,
3
+ "best_metric": 0.6895306859205776,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-117",
5
  "epoch": 5.0,
6
  "eval_steps": 500,
7
  "global_step": 195,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6462093862815884,
15
+ "eval_loss": 0.6229268908500671,
16
+ "eval_runtime": 0.6366,
17
+ "eval_samples_per_second": 435.104,
18
+ "eval_steps_per_second": 14.137,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.6570397111913358,
24
+ "eval_loss": 0.7122572064399719,
25
+ "eval_runtime": 0.6369,
26
+ "eval_samples_per_second": 434.899,
27
+ "eval_steps_per_second": 14.13,
28
  "step": 78
29
  },
30
  {
31
  "epoch": 3.0,
32
+ "eval_accuracy": 0.6895306859205776,
33
+ "eval_loss": 0.6926298141479492,
34
+ "eval_runtime": 0.638,
35
+ "eval_samples_per_second": 434.136,
36
+ "eval_steps_per_second": 14.106,
37
  "step": 117
38
  },
39
  {
40
  "epoch": 4.0,
41
+ "eval_accuracy": 0.6678700361010831,
42
+ "eval_loss": 0.923632025718689,
43
+ "eval_runtime": 0.6386,
44
+ "eval_samples_per_second": 433.765,
45
+ "eval_steps_per_second": 14.093,
46
  "step": 156
47
  },
48
  {
49
  "epoch": 5.0,
50
+ "eval_accuracy": 0.6787003610108303,
51
+ "eval_loss": 1.1270627975463867,
52
+ "eval_runtime": 0.6426,
53
+ "eval_samples_per_second": 431.091,
54
+ "eval_steps_per_second": 14.007,
55
  "step": 195
56
  }
57
  ],
 
77
  "trial_name": null,
78
  "trial_params": {
79
  "dropout_rate": 0.0134,
80
+ "learning_rate": 0.0001,
81
  "max_length": 32,
82
  "num_train_epochs": 5,
83
  "per_device_train_batch_size": 64
run-1/checkpoint-39/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:501c9d0db52de0ae3a0489f8234dd2aff7ece8059a7ea3877035dd4ced9cc94b
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc59468e2ba140a6f6e62e3c90e1035f190056bdc9b499e74ecade2b7fa551e7
3
  size 437958648
run-1/checkpoint-39/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dddedf4584c7e8be9e94f8272968acd37acaa267e4adc3d479a43758a38e02fc
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e92e24c71c1ca864f10dfed46043f5ec1d165eb8d2e762e99bf4ffdf75d5ea92
3
  size 876038394
run-1/checkpoint-39/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:598448aee663bd69feb15b730dcfefd83bf321a2bd53ece879436b0e2e85c889
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d99f0741d1b8c0fb2ef672037883ae1152cbbf2c3bb454d16b7df9a7ccf7f447
3
  size 1064
run-1/checkpoint-39/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 39,
3
- "best_metric": 0.5270758122743683,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,11 +11,11 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5270758122743683,
15
- "eval_loss": 0.6918498277664185,
16
- "eval_runtime": 0.6348,
17
- "eval_samples_per_second": 436.374,
18
- "eval_steps_per_second": 14.178,
19
  "step": 39
20
  }
21
  ],
@@ -41,7 +41,7 @@
41
  "trial_name": null,
42
  "trial_params": {
43
  "dropout_rate": 0.0134,
44
- "learning_rate": 0.05,
45
  "max_length": 32,
46
  "num_train_epochs": 5,
47
  "per_device_train_batch_size": 64
 
1
  {
2
  "best_global_step": 39,
3
+ "best_metric": 0.6462093862815884,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6462093862815884,
15
+ "eval_loss": 0.6229268908500671,
16
+ "eval_runtime": 0.6366,
17
+ "eval_samples_per_second": 435.104,
18
+ "eval_steps_per_second": 14.137,
19
  "step": 39
20
  }
21
  ],
 
41
  "trial_name": null,
42
  "trial_params": {
43
  "dropout_rate": 0.0134,
44
+ "learning_rate": 0.0001,
45
  "max_length": 32,
46
  "num_train_epochs": 5,
47
  "per_device_train_batch_size": 64
run-1/checkpoint-39/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e99cfd8ed47c6deda5dd532724592f1d8997b2921756d80eddbdaca287b3e257
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45289e81d0d437544c5e5fa7f0465860e7bb537a8d7f9f378dbccfa22dddc497
3
  size 5432
run-1/checkpoint-78/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8f0c9c3d76c0bfba182562429ed0c930aab0b272c6f424390f7e6b8a2dfa20e
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25fdea99ceb61fa6f36538a22ecbe5e6b141d77b47eced9ff8db8ba1d1865f5
3
  size 437958648
run-1/checkpoint-78/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17669738eaf34e1681623748501546276143a75b4cf564da21f7ab18308502e0
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9eaf725675e305ebee81c205aab615742a1d5d5ea0cb52c9a63839c25b9ce52
3
  size 876038394
run-1/checkpoint-78/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:562900fbc54f61683673f7d45f332e2421789e473397b69e96adb452a1719746
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb667ae3b67495afb6dd345289d7886f47c7be4d88545cef5fd8a2859941ae93
3
  size 1064
run-1/checkpoint-78/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_global_step": 39,
3
- "best_metric": 0.5270758122743683,
4
- "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-39",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
7
  "global_step": 78,
@@ -11,20 +11,20 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.5270758122743683,
15
- "eval_loss": 0.6918498277664185,
16
- "eval_runtime": 0.6348,
17
- "eval_samples_per_second": 436.374,
18
- "eval_steps_per_second": 14.178,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.4729241877256318,
24
- "eval_loss": 6.468245983123779,
25
- "eval_runtime": 0.638,
26
- "eval_samples_per_second": 434.168,
27
- "eval_steps_per_second": 14.107,
28
  "step": 78
29
  }
30
  ],
@@ -50,7 +50,7 @@
50
  "trial_name": null,
51
  "trial_params": {
52
  "dropout_rate": 0.0134,
53
- "learning_rate": 0.05,
54
  "max_length": 32,
55
  "num_train_epochs": 5,
56
  "per_device_train_batch_size": 64
 
1
  {
2
+ "best_global_step": 78,
3
+ "best_metric": 0.6570397111913358,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-1/checkpoint-78",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
7
  "global_step": 78,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6462093862815884,
15
+ "eval_loss": 0.6229268908500671,
16
+ "eval_runtime": 0.6366,
17
+ "eval_samples_per_second": 435.104,
18
+ "eval_steps_per_second": 14.137,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.6570397111913358,
24
+ "eval_loss": 0.7122572064399719,
25
+ "eval_runtime": 0.6369,
26
+ "eval_samples_per_second": 434.899,
27
+ "eval_steps_per_second": 14.13,
28
  "step": 78
29
  }
30
  ],
 
50
  "trial_name": null,
51
  "trial_params": {
52
  "dropout_rate": 0.0134,
53
+ "learning_rate": 0.0001,
54
  "max_length": 32,
55
  "num_train_epochs": 5,
56
  "per_device_train_batch_size": 64
run-1/checkpoint-78/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e99cfd8ed47c6deda5dd532724592f1d8997b2921756d80eddbdaca287b3e257
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45289e81d0d437544c5e5fa7f0465860e7bb537a8d7f9f378dbccfa22dddc497
3
  size 5432
run-9/checkpoint-117/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-9/checkpoint-117/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1abed0df3e92393ff4391869d54d2c1fa20917232a92b720a9f846349fe6f100
3
+ size 437958648
run-9/checkpoint-117/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7614686ee26a6974dc0642d7f5447a7356b657ebccec6e742aff3a2e8fd48c4e
3
+ size 876038394
run-9/checkpoint-117/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066817b2001cdf2cab3204d72b7658f8308ed56a8eab94345bd5ce0742b9b7f7
3
+ size 14244
run-9/checkpoint-117/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d3367c94eb78632ebcf0eb99adc77bf56c3333df91fbddd3c07e9e4a158dd6d
3
+ size 1064
run-9/checkpoint-117/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-9/checkpoint-117/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-9/checkpoint-117/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-9/checkpoint-117/trainer_state.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 39,
3
+ "best_metric": 0.5270758122743683,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-9/checkpoint-39",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 117,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.5270758122743683,
15
+ "eval_loss": 0.6937959790229797,
16
+ "eval_runtime": 0.6356,
17
+ "eval_samples_per_second": 435.835,
18
+ "eval_steps_per_second": 14.161,
19
+ "step": 39
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_accuracy": 0.4729241877256318,
24
+ "eval_loss": 0.704865038394928,
25
+ "eval_runtime": 0.6385,
26
+ "eval_samples_per_second": 433.823,
27
+ "eval_steps_per_second": 14.095,
28
+ "step": 78
29
+ },
30
+ {
31
+ "epoch": 3.0,
32
+ "eval_accuracy": 0.5270758122743683,
33
+ "eval_loss": 0.7393977046012878,
34
+ "eval_runtime": 0.6383,
35
+ "eval_samples_per_second": 433.993,
36
+ "eval_steps_per_second": 14.101,
37
+ "step": 117
38
+ }
39
+ ],
40
+ "logging_steps": 500,
41
+ "max_steps": 195,
42
+ "num_input_tokens_seen": 0,
43
+ "num_train_epochs": 5,
44
+ "save_steps": 500,
45
+ "stateful_callbacks": {
46
+ "TrainerControl": {
47
+ "args": {
48
+ "should_epoch_stop": false,
49
+ "should_evaluate": false,
50
+ "should_log": false,
51
+ "should_save": true,
52
+ "should_training_stop": false
53
+ },
54
+ "attributes": {}
55
+ }
56
+ },
57
+ "total_flos": 0,
58
+ "train_batch_size": 64,
59
+ "trial_name": null,
60
+ "trial_params": {
61
+ "dropout_rate": 0.0134,
62
+ "learning_rate": 0.001,
63
+ "max_length": 32,
64
+ "num_train_epochs": 5,
65
+ "per_device_train_batch_size": 64
66
+ }
67
+ }
run-9/checkpoint-117/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a54d1e9a16102072f117ac90cba2c8de9da1d67b974b29749928b6494fcb115
3
+ size 5432
run-9/checkpoint-117/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-9/checkpoint-156/config.json CHANGED
@@ -19,7 +19,7 @@
19
  "position_embedding_type": "absolute",
20
  "problem_type": "single_label_classification",
21
  "torch_dtype": "float32",
22
- "transformers_version": "4.50.2",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
 
19
  "position_embedding_type": "absolute",
20
  "problem_type": "single_label_classification",
21
  "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
run-9/checkpoint-156/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12b6d96d663efbc57e9542ff44865022edd9202194af887b918ca61af7f19f7c
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59b563575b736ac4b51a650209a951abec8489e61c314511a0a0d14dac5ea6ae
3
  size 437958648
run-9/checkpoint-156/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97929b38536d37457220566effb284177d8001c95496f77c9616b6bdf757d2e2
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d1bfb12ef9c7bb61c13e253328f942c92a1d08309d399091589b16dcdade709
3
  size 876038394
run-9/checkpoint-156/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4a64755083f7633da1abd1de577e641d7084f1c57535fb19884090b3171beb5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f61eb961c8bdfdb65315b87a5752740304715f4131aaf57d9e9514dcd94c88a
3
  size 14244
run-9/checkpoint-156/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bde52913dc62c559307ee5640e319a3f50de7393e9596c867af3260f119d18a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bbdd17099c17fd8973dbee7c8db3dc0750b97033702a5a917854c7124a17dfc
3
  size 1064
run-9/checkpoint-156/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_global_step": 78,
3
- "best_metric": 0.6498194945848376,
4
- "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-9/checkpoint-78",
5
- "epoch": 2.0,
6
  "eval_steps": 500,
7
  "global_step": 156,
8
  "is_hyper_param_search": true,
@@ -11,27 +11,45 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.6498194945848376,
15
- "eval_loss": 0.6421064734458923,
16
- "eval_runtime": 0.6578,
17
- "eval_samples_per_second": 421.089,
18
- "eval_steps_per_second": 7.601,
19
- "step": 78
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.6462093862815884,
24
- "eval_loss": 0.6568495035171509,
25
- "eval_runtime": 0.6565,
26
- "eval_samples_per_second": 421.952,
27
- "eval_steps_per_second": 7.616,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "step": 156
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 234,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 3,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
@@ -46,11 +64,13 @@
46
  }
47
  },
48
  "total_flos": 0,
49
- "train_batch_size": 32,
50
  "trial_name": null,
51
  "trial_params": {
52
- "learning_rate": 4.274717233172879e-05,
53
- "num_train_epochs": 3,
54
- "per_device_train_batch_size": 32
 
 
55
  }
56
  }
 
1
  {
2
+ "best_global_step": 39,
3
+ "best_metric": 0.5270758122743683,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-9/checkpoint-39",
5
+ "epoch": 4.0,
6
  "eval_steps": 500,
7
  "global_step": 156,
8
  "is_hyper_param_search": true,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.5270758122743683,
15
+ "eval_loss": 0.6937959790229797,
16
+ "eval_runtime": 0.6356,
17
+ "eval_samples_per_second": 435.835,
18
+ "eval_steps_per_second": 14.161,
19
+ "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.4729241877256318,
24
+ "eval_loss": 0.704865038394928,
25
+ "eval_runtime": 0.6385,
26
+ "eval_samples_per_second": 433.823,
27
+ "eval_steps_per_second": 14.095,
28
+ "step": 78
29
+ },
30
+ {
31
+ "epoch": 3.0,
32
+ "eval_accuracy": 0.5270758122743683,
33
+ "eval_loss": 0.7393977046012878,
34
+ "eval_runtime": 0.6383,
35
+ "eval_samples_per_second": 433.993,
36
+ "eval_steps_per_second": 14.101,
37
+ "step": 117
38
+ },
39
+ {
40
+ "epoch": 4.0,
41
+ "eval_accuracy": 0.4729241877256318,
42
+ "eval_loss": 0.7269212603569031,
43
+ "eval_runtime": 0.6407,
44
+ "eval_samples_per_second": 432.331,
45
+ "eval_steps_per_second": 14.047,
46
  "step": 156
47
  }
48
  ],
49
  "logging_steps": 500,
50
+ "max_steps": 195,
51
  "num_input_tokens_seen": 0,
52
+ "num_train_epochs": 5,
53
  "save_steps": 500,
54
  "stateful_callbacks": {
55
  "TrainerControl": {
 
64
  }
65
  },
66
  "total_flos": 0,
67
+ "train_batch_size": 64,
68
  "trial_name": null,
69
  "trial_params": {
70
+ "dropout_rate": 0.0134,
71
+ "learning_rate": 0.001,
72
+ "max_length": 32,
73
+ "num_train_epochs": 5,
74
+ "per_device_train_batch_size": 64
75
  }
76
  }
run-9/checkpoint-156/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c620609d8f82ee84e735c3bd984fd4331ba2dc7eaba5332a6cbd4d79b5779119
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a54d1e9a16102072f117ac90cba2c8de9da1d67b974b29749928b6494fcb115
3
  size 5432
run-9/checkpoint-195/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-9/checkpoint-195/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95966654c3a0e0dc96b1a63065d98f17589e0f452c15d2ea154557e3f514d495
3
+ size 437958648
run-9/checkpoint-195/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0de55e5f097cf92a82bb46b0bd569db756d57fe4bab23954436fd487a55a393
3
+ size 876038394