samil24 commited on
Commit
2fa6ee6
·
verified ·
1 Parent(s): faa8b56

Training in progress, epoch 1

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. model.safetensors +1 -1
  2. run-10/checkpoint-117/config.json +26 -0
  3. run-10/checkpoint-117/model.safetensors +3 -0
  4. run-10/checkpoint-117/optimizer.pt +3 -0
  5. run-10/checkpoint-117/rng_state.pth +3 -0
  6. run-10/checkpoint-117/scheduler.pt +3 -0
  7. run-10/checkpoint-117/special_tokens_map.json +7 -0
  8. run-10/checkpoint-117/tokenizer.json +0 -0
  9. run-10/checkpoint-117/tokenizer_config.json +56 -0
  10. run-10/checkpoint-117/trainer_state.json +65 -0
  11. run-10/checkpoint-117/training_args.bin +3 -0
  12. run-10/checkpoint-117/vocab.txt +0 -0
  13. run-10/checkpoint-39/model.safetensors +1 -1
  14. run-10/checkpoint-39/optimizer.pt +1 -1
  15. run-10/checkpoint-39/scheduler.pt +1 -1
  16. run-10/checkpoint-39/trainer_state.json +10 -10
  17. run-10/checkpoint-39/training_args.bin +1 -1
  18. run-10/checkpoint-78/model.safetensors +1 -1
  19. run-10/checkpoint-78/optimizer.pt +1 -1
  20. run-10/checkpoint-78/scheduler.pt +1 -1
  21. run-10/checkpoint-78/trainer_state.json +14 -14
  22. run-10/checkpoint-78/training_args.bin +1 -1
  23. run-11/checkpoint-39/model.safetensors +1 -1
  24. run-11/checkpoint-39/optimizer.pt +1 -1
  25. run-11/checkpoint-39/scheduler.pt +1 -1
  26. run-11/checkpoint-39/trainer_state.json +10 -10
  27. run-11/checkpoint-39/training_args.bin +1 -1
  28. run-6/checkpoint-78/model.safetensors +1 -1
  29. run-6/checkpoint-78/optimizer.pt +1 -1
  30. run-6/checkpoint-78/scheduler.pt +1 -1
  31. run-6/checkpoint-78/trainer_state.json +16 -16
  32. run-6/checkpoint-78/training_args.bin +1 -1
  33. run-8/checkpoint-39/config.json +26 -0
  34. run-8/checkpoint-39/model.safetensors +3 -0
  35. run-8/checkpoint-39/optimizer.pt +3 -0
  36. run-8/checkpoint-39/rng_state.pth +3 -0
  37. run-8/checkpoint-39/scheduler.pt +3 -0
  38. run-8/checkpoint-39/special_tokens_map.json +7 -0
  39. run-8/checkpoint-39/tokenizer.json +0 -0
  40. run-8/checkpoint-39/tokenizer_config.json +56 -0
  41. run-8/checkpoint-39/trainer_state.json +47 -0
  42. run-8/checkpoint-39/training_args.bin +3 -0
  43. run-8/checkpoint-39/vocab.txt +0 -0
  44. runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596808.80ab07271599.1157.9 +3 -0
  45. runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596828.80ab07271599.1157.10 +3 -0
  46. runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596868.80ab07271599.1157.11 +3 -0
  47. runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596888.80ab07271599.1157.12 +3 -0
  48. runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596968.80ab07271599.1157.13 +3 -0
  49. runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743597008.80ab07271599.1157.14 +3 -0
  50. runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743597833.80ab07271599.1157.15 +3 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b014e1a4f3cc837453f8f6fe2122e9cbb3417ddb9c4fca05a47650227d5de336
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8b6a21919d34fea94dcddd16a04e049bd1f84af011d7d8a5d8df0ea5c2645b0
3
  size 437958648
run-10/checkpoint-117/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.2",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-10/checkpoint-117/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c47ed6122ca7bc8e40bc3598b0017ec8907e46833e5e678991e9ee55c1237a8d
3
+ size 437958648
run-10/checkpoint-117/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f28389eb0f6f9c714e496503a34e46bc488f7a9fc16ccdf392735f34113fd7
3
+ size 876038394
run-10/checkpoint-117/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066817b2001cdf2cab3204d72b7658f8308ed56a8eab94345bd5ce0742b9b7f7
3
+ size 14244
run-10/checkpoint-117/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f7342538ed78d4d8f04eb05b0cb563175fb662d097c0226efb27e3a48d219ce
3
+ size 1064
run-10/checkpoint-117/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-10/checkpoint-117/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-10/checkpoint-117/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-10/checkpoint-117/trainer_state.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 78,
3
+ "best_metric": 0.6534296028880866,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-10/checkpoint-78",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 117,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.6209386281588448,
15
+ "eval_loss": 0.6498723030090332,
16
+ "eval_runtime": 0.6464,
17
+ "eval_samples_per_second": 428.502,
18
+ "eval_steps_per_second": 13.922,
19
+ "step": 39
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_accuracy": 0.6534296028880866,
24
+ "eval_loss": 0.6439489126205444,
25
+ "eval_runtime": 0.6414,
26
+ "eval_samples_per_second": 431.884,
27
+ "eval_steps_per_second": 14.032,
28
+ "step": 78
29
+ },
30
+ {
31
+ "epoch": 3.0,
32
+ "eval_accuracy": 0.6462093862815884,
33
+ "eval_loss": 0.8346278071403503,
34
+ "eval_runtime": 0.6444,
35
+ "eval_samples_per_second": 429.867,
36
+ "eval_steps_per_second": 13.967,
37
+ "step": 117
38
+ }
39
+ ],
40
+ "logging_steps": 500,
41
+ "max_steps": 585,
42
+ "num_input_tokens_seen": 0,
43
+ "num_train_epochs": 15,
44
+ "save_steps": 500,
45
+ "stateful_callbacks": {
46
+ "TrainerControl": {
47
+ "args": {
48
+ "should_epoch_stop": false,
49
+ "should_evaluate": false,
50
+ "should_log": false,
51
+ "should_save": true,
52
+ "should_training_stop": false
53
+ },
54
+ "attributes": {}
55
+ }
56
+ },
57
+ "total_flos": 0,
58
+ "train_batch_size": 64,
59
+ "trial_name": null,
60
+ "trial_params": {
61
+ "learning_rate": 4.267043632502465e-05,
62
+ "num_train_epochs": 15,
63
+ "per_device_train_batch_size": 64
64
+ }
65
+ }
run-10/checkpoint-117/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca59d4c229b54e4a43ebfb084c4a5dae6a76fa9a553c092a64615db9e9ac09fa
3
+ size 5432
run-10/checkpoint-117/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-10/checkpoint-39/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33eeee34eb19b5a53b8e3f572ead86f2271e5910fe74f758fa8d0ff092e6b391
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84fc77391ddef8336e458f08bf4093d625e11d76cca6683a0aa411cbc3cc50df
3
  size 437958648
run-10/checkpoint-39/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86581670404de085fd9c69dbcf11308c08f5d81a46dfbd9b90b1afb7d2e70869
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef6a04f1db2026434b43c6e61098b549f30ff5b88fa0bc932c29f3114053f26a
3
  size 876038394
run-10/checkpoint-39/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:684eb0d3d752752a400036405eee4cb6b7ec612493b362bb0d329494abc59302
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f529f4fa824518a47ae08c4e1a82b39075bffe8de5663a92b8d6ca71868752b9
3
  size 1064
run-10/checkpoint-39/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 39,
3
- "best_metric": 0.6245487364620939,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-10/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,18 +11,18 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.6245487364620939,
15
- "eval_loss": 0.6695829629898071,
16
- "eval_runtime": 0.6396,
17
- "eval_samples_per_second": 433.066,
18
- "eval_steps_per_second": 14.071,
19
  "step": 39
20
  }
21
  ],
22
  "logging_steps": 500,
23
- "max_steps": 78,
24
  "num_input_tokens_seen": 0,
25
- "num_train_epochs": 2,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
@@ -40,8 +40,8 @@
40
  "train_batch_size": 64,
41
  "trial_name": null,
42
  "trial_params": {
43
- "learning_rate": 6.997154460264048e-05,
44
- "num_train_epochs": 2,
45
  "per_device_train_batch_size": 64
46
  }
47
  }
 
1
  {
2
  "best_global_step": 39,
3
+ "best_metric": 0.6209386281588448,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-10/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6209386281588448,
15
+ "eval_loss": 0.6498723030090332,
16
+ "eval_runtime": 0.6464,
17
+ "eval_samples_per_second": 428.502,
18
+ "eval_steps_per_second": 13.922,
19
  "step": 39
20
  }
21
  ],
22
  "logging_steps": 500,
23
+ "max_steps": 585,
24
  "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 15,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
 
40
  "train_batch_size": 64,
41
  "trial_name": null,
42
  "trial_params": {
43
+ "learning_rate": 4.267043632502465e-05,
44
+ "num_train_epochs": 15,
45
  "per_device_train_batch_size": 64
46
  }
47
  }
run-10/checkpoint-39/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1fc294c6f58e4a3ec49362dc1c2e490491f57d82a68b0d40a70b7dff56d7367
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca59d4c229b54e4a43ebfb084c4a5dae6a76fa9a553c092a64615db9e9ac09fa
3
  size 5432
run-10/checkpoint-78/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab82e99ee584d427ac3d22bdf10e582c4e46f81f388a5f641550ba50c03d88b
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f7f695c888e8b4447ada63c1486357576f38717e70641c5384fcd14e988fa5f
3
  size 437958648
run-10/checkpoint-78/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c5d6e72a20bd22fb62367b1f21eb81372b22339a170fa04f159267b3fbad3e8
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5feb162e1c7ddf695561a3dcc5f22133ada525d3248eab2310779fc9768fd8d1
3
  size 876038394
run-10/checkpoint-78/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9499799bdc32cf2c4700cc8f37e01abdad401afe3dadcd1b7e046f1bf286afbb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86637fe2567c83daac0320d69dbffa725366530e20017dd87cdf8631fba49479
3
  size 1064
run-10/checkpoint-78/trainer_state.json CHANGED
@@ -11,27 +11,27 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.6245487364620939,
15
- "eval_loss": 0.6695829629898071,
16
- "eval_runtime": 0.6396,
17
- "eval_samples_per_second": 433.066,
18
- "eval_steps_per_second": 14.071,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
  "eval_accuracy": 0.6534296028880866,
24
- "eval_loss": 0.6818255186080933,
25
- "eval_runtime": 0.6405,
26
- "eval_samples_per_second": 432.454,
27
- "eval_steps_per_second": 14.051,
28
  "step": 78
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 78,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 2,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
@@ -40,7 +40,7 @@
40
  "should_evaluate": false,
41
  "should_log": false,
42
  "should_save": true,
43
- "should_training_stop": true
44
  },
45
  "attributes": {}
46
  }
@@ -49,8 +49,8 @@
49
  "train_batch_size": 64,
50
  "trial_name": null,
51
  "trial_params": {
52
- "learning_rate": 6.997154460264048e-05,
53
- "num_train_epochs": 2,
54
  "per_device_train_batch_size": 64
55
  }
56
  }
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6209386281588448,
15
+ "eval_loss": 0.6498723030090332,
16
+ "eval_runtime": 0.6464,
17
+ "eval_samples_per_second": 428.502,
18
+ "eval_steps_per_second": 13.922,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
  "eval_accuracy": 0.6534296028880866,
24
+ "eval_loss": 0.6439489126205444,
25
+ "eval_runtime": 0.6414,
26
+ "eval_samples_per_second": 431.884,
27
+ "eval_steps_per_second": 14.032,
28
  "step": 78
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 585,
33
  "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 15,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
 
40
  "should_evaluate": false,
41
  "should_log": false,
42
  "should_save": true,
43
+ "should_training_stop": false
44
  },
45
  "attributes": {}
46
  }
 
49
  "train_batch_size": 64,
50
  "trial_name": null,
51
  "trial_params": {
52
+ "learning_rate": 4.267043632502465e-05,
53
+ "num_train_epochs": 15,
54
  "per_device_train_batch_size": 64
55
  }
56
  }
run-10/checkpoint-78/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1fc294c6f58e4a3ec49362dc1c2e490491f57d82a68b0d40a70b7dff56d7367
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca59d4c229b54e4a43ebfb084c4a5dae6a76fa9a553c092a64615db9e9ac09fa
3
  size 5432
run-11/checkpoint-39/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44a3c05303709178b86495805192f65f4ec8d73a95da43768afde0c8bc82b1aa
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd42fc3b3470f245ee25b1a91c24b73c17f7cb4a9e86512bfce15bcf57a02781
3
  size 437958648
run-11/checkpoint-39/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35ba8bd3a137e8c256fa83610476b26e6afa00ed9f59a03889c9f9a4de1fe707
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79da32a34ab09addf9391dfc6f66924d99d858eb9c859ead19ce7c83fac90100
3
  size 876038394
run-11/checkpoint-39/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1465843e756b4dabb451b4d75f771f906f2c5f5e0d01000393acdd4920574819
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0c23801a5ddb22ded05ffc9d23b7e52483659cbd731822c0847adb92e8c801f
3
  size 1064
run-11/checkpoint-39/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 39,
3
- "best_metric": 0.6137184115523465,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-11/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
@@ -11,18 +11,18 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.6137184115523465,
15
- "eval_loss": 0.6391859650611877,
16
- "eval_runtime": 0.639,
17
- "eval_samples_per_second": 433.464,
18
- "eval_steps_per_second": 14.084,
19
  "step": 39
20
  }
21
  ],
22
  "logging_steps": 500,
23
- "max_steps": 78,
24
  "num_input_tokens_seen": 0,
25
- "num_train_epochs": 2,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
@@ -40,8 +40,8 @@
40
  "train_batch_size": 64,
41
  "trial_name": null,
42
  "trial_params": {
43
- "learning_rate": 6.811976645197309e-05,
44
- "num_train_epochs": 2,
45
  "per_device_train_batch_size": 64
46
  }
47
  }
 
1
  {
2
  "best_global_step": 39,
3
+ "best_metric": 0.6173285198555957,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-11/checkpoint-39",
5
  "epoch": 1.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6173285198555957,
15
+ "eval_loss": 0.6398611664772034,
16
+ "eval_runtime": 0.6423,
17
+ "eval_samples_per_second": 431.296,
18
+ "eval_steps_per_second": 14.013,
19
  "step": 39
20
  }
21
  ],
22
  "logging_steps": 500,
23
+ "max_steps": 585,
24
  "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 15,
26
  "save_steps": 500,
27
  "stateful_callbacks": {
28
  "TrainerControl": {
 
40
  "train_batch_size": 64,
41
  "trial_name": null,
42
  "trial_params": {
43
+ "learning_rate": 6.0322373827171756e-05,
44
+ "num_train_epochs": 15,
45
  "per_device_train_batch_size": 64
46
  }
47
  }
run-11/checkpoint-39/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ff331d460bfbfbbb0ce4eb0b36e8bba429624d138721d53eab7c2881cd497cb
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b97468c089ba6ff33fc12e4a0700ac465e7cc381a5949bbe5c24c3f27650b69
3
  size 5432
run-6/checkpoint-78/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1adfd71f163e454ceb956bbad0838ab316ab8b2269186a2de242e86d6f94d079
3
  size 437958648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb1ba5c8089724d0d7f4bcf542f63a7e62dd7b03fb7b0815005b3d794e25fd24
3
  size 437958648
run-6/checkpoint-78/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05aabd77242bc1cf7de112b4c16bf8780e353c056b0f57e4fb10fbe56c199dc1
3
  size 876038394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c6827b667b79d074df0df65b191ef0334701a4bd9c6e21bc9f0862c0cb3ac9c
3
  size 876038394
run-6/checkpoint-78/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c772fc89b86a560c39294f9e99e045616de0628fc50758bbfa4394e73dde68e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67bcf98a98ab76470c935b8832145270959789541c75629573da5ffe7cb94209
3
  size 1064
run-6/checkpoint-78/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 78,
3
- "best_metric": 0.628158844765343,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-6/checkpoint-78",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,27 +11,27 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_accuracy": 0.592057761732852,
15
- "eval_loss": 0.6658051609992981,
16
- "eval_runtime": 0.6374,
17
- "eval_samples_per_second": 434.596,
18
- "eval_steps_per_second": 14.12,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.628158844765343,
24
- "eval_loss": 0.6411819458007812,
25
- "eval_runtime": 0.6383,
26
- "eval_samples_per_second": 433.967,
27
- "eval_steps_per_second": 14.1,
28
  "step": 78
29
  }
30
  ],
31
  "logging_steps": 500,
32
- "max_steps": 78,
33
  "num_input_tokens_seen": 0,
34
- "num_train_epochs": 2,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
@@ -40,7 +40,7 @@
40
  "should_evaluate": false,
41
  "should_log": false,
42
  "should_save": true,
43
- "should_training_stop": true
44
  },
45
  "attributes": {}
46
  }
@@ -49,8 +49,8 @@
49
  "train_batch_size": 64,
50
  "trial_name": null,
51
  "trial_params": {
52
- "learning_rate": 5.112284871578607e-05,
53
- "num_train_epochs": 2,
54
  "per_device_train_batch_size": 64
55
  }
56
  }
 
1
  {
2
  "best_global_step": 78,
3
+ "best_metric": 0.6389891696750902,
4
  "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-6/checkpoint-78",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_accuracy": 0.6101083032490975,
15
+ "eval_loss": 0.6483533978462219,
16
+ "eval_runtime": 0.6678,
17
+ "eval_samples_per_second": 414.792,
18
+ "eval_steps_per_second": 13.477,
19
  "step": 39
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.6389891696750902,
24
+ "eval_loss": 0.6526868343353271,
25
+ "eval_runtime": 0.6398,
26
+ "eval_samples_per_second": 432.973,
27
+ "eval_steps_per_second": 14.068,
28
  "step": 78
29
  }
30
  ],
31
  "logging_steps": 500,
32
+ "max_steps": 585,
33
  "num_input_tokens_seen": 0,
34
+ "num_train_epochs": 15,
35
  "save_steps": 500,
36
  "stateful_callbacks": {
37
  "TrainerControl": {
 
40
  "should_evaluate": false,
41
  "should_log": false,
42
  "should_save": true,
43
+ "should_training_stop": false
44
  },
45
  "attributes": {}
46
  }
 
49
  "train_batch_size": 64,
50
  "trial_name": null,
51
  "trial_params": {
52
+ "learning_rate": 4.797258707523021e-05,
53
+ "num_train_epochs": 15,
54
  "per_device_train_batch_size": 64
55
  }
56
  }
run-6/checkpoint-78/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1817587903d4ced2975425ce78928c60729b26defe27d7acb18264dbabe1562
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f932c25ae644f210e4526f3c1be14c7f7fe52253969f112e7a5e335bb77ad857
3
  size 5432
run-8/checkpoint-39/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.2",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-8/checkpoint-39/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c9e956b4a26c697b232b9334cd775e3b51aed9bce2573b1e76f7dae5e4a7c6b
3
+ size 437958648
run-8/checkpoint-39/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820a37fdd0077c3ce86d9024cd6fb7e3a34fae69f7d9c28732ed25e4912cfd8a
3
+ size 876038394
run-8/checkpoint-39/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce2001d6c41d462c4a530df5214c4ba6ac04088f8883ec9b91629a00a7da50d
3
+ size 14244
run-8/checkpoint-39/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19d792f52af224ccafbcd7e21651118681b90d6c9cc69043551847eddb44485b
3
+ size 1064
run-8/checkpoint-39/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-8/checkpoint-39/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-8/checkpoint-39/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-8/checkpoint-39/trainer_state.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 39,
3
+ "best_metric": 0.6064981949458483,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-8/checkpoint-39",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 39,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_accuracy": 0.6064981949458483,
15
+ "eval_loss": 0.6455614566802979,
16
+ "eval_runtime": 0.6414,
17
+ "eval_samples_per_second": 431.878,
18
+ "eval_steps_per_second": 14.032,
19
+ "step": 39
20
+ }
21
+ ],
22
+ "logging_steps": 500,
23
+ "max_steps": 585,
24
+ "num_input_tokens_seen": 0,
25
+ "num_train_epochs": 15,
26
+ "save_steps": 500,
27
+ "stateful_callbacks": {
28
+ "TrainerControl": {
29
+ "args": {
30
+ "should_epoch_stop": false,
31
+ "should_evaluate": false,
32
+ "should_log": false,
33
+ "should_save": true,
34
+ "should_training_stop": false
35
+ },
36
+ "attributes": {}
37
+ }
38
+ },
39
+ "total_flos": 0,
40
+ "train_batch_size": 64,
41
+ "trial_name": null,
42
+ "trial_params": {
43
+ "learning_rate": 5.2838273651328355e-05,
44
+ "num_train_epochs": 15,
45
+ "per_device_train_batch_size": 64
46
+ }
47
+ }
run-8/checkpoint-39/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9c40a0916e7974092f2fa779ef8cfb9240339680ff02e53018c1dfa9048c25
3
+ size 5432
run-8/checkpoint-39/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596808.80ab07271599.1157.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c74dcbf62988c7e22aa412f5aadb89285e05980607425d5d45967f0d31e712d3
3
+ size 5419
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596828.80ab07271599.1157.10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3049f8b858ee598753195ed39772875c6763010b29b910e463557107d00c9c70
3
+ size 5737
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596868.80ab07271599.1157.11 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21ea402050781a5fc1e7205739b21548beae7a910ff5daf3569700937f263bf
3
+ size 5420
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596888.80ab07271599.1157.12 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4e162a963f306eddcb3d58edc4ef38e2e8c39d7a3b3163435aa9d3eefb6bb8a
3
+ size 6376
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596968.80ab07271599.1157.13 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26ab4deaa4cec5d624e1c37c96a35688c766f834c40ca99e777ed48ccee10e77
3
+ size 5737
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743597008.80ab07271599.1157.14 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:123613b55c16fb967f3fe157b40416f8dfb382adfc6cb8550d6fce811f02dfd9
3
+ size 10494
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743597833.80ab07271599.1157.15 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:618b5e99bf134c50583c5eeeb277df0fd50f4c888a066e9c2c6cc8bd4aaeabd7
3
+ size 10494