Training in progress, epoch 1
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- model.safetensors +1 -1
- run-10/checkpoint-117/config.json +26 -0
- run-10/checkpoint-117/model.safetensors +3 -0
- run-10/checkpoint-117/optimizer.pt +3 -0
- run-10/checkpoint-117/rng_state.pth +3 -0
- run-10/checkpoint-117/scheduler.pt +3 -0
- run-10/checkpoint-117/special_tokens_map.json +7 -0
- run-10/checkpoint-117/tokenizer.json +0 -0
- run-10/checkpoint-117/tokenizer_config.json +56 -0
- run-10/checkpoint-117/trainer_state.json +65 -0
- run-10/checkpoint-117/training_args.bin +3 -0
- run-10/checkpoint-117/vocab.txt +0 -0
- run-10/checkpoint-39/model.safetensors +1 -1
- run-10/checkpoint-39/optimizer.pt +1 -1
- run-10/checkpoint-39/scheduler.pt +1 -1
- run-10/checkpoint-39/trainer_state.json +10 -10
- run-10/checkpoint-39/training_args.bin +1 -1
- run-10/checkpoint-78/model.safetensors +1 -1
- run-10/checkpoint-78/optimizer.pt +1 -1
- run-10/checkpoint-78/scheduler.pt +1 -1
- run-10/checkpoint-78/trainer_state.json +14 -14
- run-10/checkpoint-78/training_args.bin +1 -1
- run-11/checkpoint-39/model.safetensors +1 -1
- run-11/checkpoint-39/optimizer.pt +1 -1
- run-11/checkpoint-39/scheduler.pt +1 -1
- run-11/checkpoint-39/trainer_state.json +10 -10
- run-11/checkpoint-39/training_args.bin +1 -1
- run-6/checkpoint-78/model.safetensors +1 -1
- run-6/checkpoint-78/optimizer.pt +1 -1
- run-6/checkpoint-78/scheduler.pt +1 -1
- run-6/checkpoint-78/trainer_state.json +16 -16
- run-6/checkpoint-78/training_args.bin +1 -1
- run-8/checkpoint-39/config.json +26 -0
- run-8/checkpoint-39/model.safetensors +3 -0
- run-8/checkpoint-39/optimizer.pt +3 -0
- run-8/checkpoint-39/rng_state.pth +3 -0
- run-8/checkpoint-39/scheduler.pt +3 -0
- run-8/checkpoint-39/special_tokens_map.json +7 -0
- run-8/checkpoint-39/tokenizer.json +0 -0
- run-8/checkpoint-39/tokenizer_config.json +56 -0
- run-8/checkpoint-39/trainer_state.json +47 -0
- run-8/checkpoint-39/training_args.bin +3 -0
- run-8/checkpoint-39/vocab.txt +0 -0
- runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596808.80ab07271599.1157.9 +3 -0
- runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596828.80ab07271599.1157.10 +3 -0
- runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596868.80ab07271599.1157.11 +3 -0
- runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596888.80ab07271599.1157.12 +3 -0
- runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596968.80ab07271599.1157.13 +3 -0
- runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743597008.80ab07271599.1157.14 +3 -0
- runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743597833.80ab07271599.1157.15 +3 -0
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 437958648
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8b6a21919d34fea94dcddd16a04e049bd1f84af011d7d8a5d8df0ea5c2645b0
|
3 |
size 437958648
|
run-10/checkpoint-117/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertForSequenceClassification"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 3072,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"problem_type": "single_label_classification",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.50.2",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
run-10/checkpoint-117/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c47ed6122ca7bc8e40bc3598b0017ec8907e46833e5e678991e9ee55c1237a8d
|
3 |
+
size 437958648
|
run-10/checkpoint-117/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7f28389eb0f6f9c714e496503a34e46bc488f7a9fc16ccdf392735f34113fd7
|
3 |
+
size 876038394
|
run-10/checkpoint-117/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:066817b2001cdf2cab3204d72b7658f8308ed56a8eab94345bd5ce0742b9b7f7
|
3 |
+
size 14244
|
run-10/checkpoint-117/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f7342538ed78d4d8f04eb05b0cb563175fb662d097c0226efb27e3a48d219ce
|
3 |
+
size 1064
|
run-10/checkpoint-117/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-10/checkpoint-117/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-10/checkpoint-117/tokenizer_config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": false,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"extra_special_tokens": {},
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"pad_token": "[PAD]",
|
51 |
+
"sep_token": "[SEP]",
|
52 |
+
"strip_accents": null,
|
53 |
+
"tokenize_chinese_chars": true,
|
54 |
+
"tokenizer_class": "BertTokenizer",
|
55 |
+
"unk_token": "[UNK]"
|
56 |
+
}
|
run-10/checkpoint-117/trainer_state.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 78,
|
3 |
+
"best_metric": 0.6534296028880866,
|
4 |
+
"best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-10/checkpoint-78",
|
5 |
+
"epoch": 3.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 117,
|
8 |
+
"is_hyper_param_search": true,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.6209386281588448,
|
15 |
+
"eval_loss": 0.6498723030090332,
|
16 |
+
"eval_runtime": 0.6464,
|
17 |
+
"eval_samples_per_second": 428.502,
|
18 |
+
"eval_steps_per_second": 13.922,
|
19 |
+
"step": 39
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"epoch": 2.0,
|
23 |
+
"eval_accuracy": 0.6534296028880866,
|
24 |
+
"eval_loss": 0.6439489126205444,
|
25 |
+
"eval_runtime": 0.6414,
|
26 |
+
"eval_samples_per_second": 431.884,
|
27 |
+
"eval_steps_per_second": 14.032,
|
28 |
+
"step": 78
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 3.0,
|
32 |
+
"eval_accuracy": 0.6462093862815884,
|
33 |
+
"eval_loss": 0.8346278071403503,
|
34 |
+
"eval_runtime": 0.6444,
|
35 |
+
"eval_samples_per_second": 429.867,
|
36 |
+
"eval_steps_per_second": 13.967,
|
37 |
+
"step": 117
|
38 |
+
}
|
39 |
+
],
|
40 |
+
"logging_steps": 500,
|
41 |
+
"max_steps": 585,
|
42 |
+
"num_input_tokens_seen": 0,
|
43 |
+
"num_train_epochs": 15,
|
44 |
+
"save_steps": 500,
|
45 |
+
"stateful_callbacks": {
|
46 |
+
"TrainerControl": {
|
47 |
+
"args": {
|
48 |
+
"should_epoch_stop": false,
|
49 |
+
"should_evaluate": false,
|
50 |
+
"should_log": false,
|
51 |
+
"should_save": true,
|
52 |
+
"should_training_stop": false
|
53 |
+
},
|
54 |
+
"attributes": {}
|
55 |
+
}
|
56 |
+
},
|
57 |
+
"total_flos": 0,
|
58 |
+
"train_batch_size": 64,
|
59 |
+
"trial_name": null,
|
60 |
+
"trial_params": {
|
61 |
+
"learning_rate": 4.267043632502465e-05,
|
62 |
+
"num_train_epochs": 15,
|
63 |
+
"per_device_train_batch_size": 64
|
64 |
+
}
|
65 |
+
}
|
run-10/checkpoint-117/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca59d4c229b54e4a43ebfb084c4a5dae6a76fa9a553c092a64615db9e9ac09fa
|
3 |
+
size 5432
|
run-10/checkpoint-117/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-10/checkpoint-39/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 437958648
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84fc77391ddef8336e458f08bf4093d625e11d76cca6683a0aa411cbc3cc50df
|
3 |
size 437958648
|
run-10/checkpoint-39/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 876038394
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef6a04f1db2026434b43c6e61098b549f30ff5b88fa0bc932c29f3114053f26a
|
3 |
size 876038394
|
run-10/checkpoint-39/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f529f4fa824518a47ae08c4e1a82b39075bffe8de5663a92b8d6ca71868752b9
|
3 |
size 1064
|
run-10/checkpoint-39/trainer_state.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"best_global_step": 39,
|
3 |
-
"best_metric": 0.
|
4 |
"best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-10/checkpoint-39",
|
5 |
"epoch": 1.0,
|
6 |
"eval_steps": 500,
|
@@ -11,18 +11,18 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 1.0,
|
14 |
-
"eval_accuracy": 0.
|
15 |
-
"eval_loss": 0.
|
16 |
-
"eval_runtime": 0.
|
17 |
-
"eval_samples_per_second":
|
18 |
-
"eval_steps_per_second":
|
19 |
"step": 39
|
20 |
}
|
21 |
],
|
22 |
"logging_steps": 500,
|
23 |
-
"max_steps":
|
24 |
"num_input_tokens_seen": 0,
|
25 |
-
"num_train_epochs":
|
26 |
"save_steps": 500,
|
27 |
"stateful_callbacks": {
|
28 |
"TrainerControl": {
|
@@ -40,8 +40,8 @@
|
|
40 |
"train_batch_size": 64,
|
41 |
"trial_name": null,
|
42 |
"trial_params": {
|
43 |
-
"learning_rate":
|
44 |
-
"num_train_epochs":
|
45 |
"per_device_train_batch_size": 64
|
46 |
}
|
47 |
}
|
|
|
1 |
{
|
2 |
"best_global_step": 39,
|
3 |
+
"best_metric": 0.6209386281588448,
|
4 |
"best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-10/checkpoint-39",
|
5 |
"epoch": 1.0,
|
6 |
"eval_steps": 500,
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.6209386281588448,
|
15 |
+
"eval_loss": 0.6498723030090332,
|
16 |
+
"eval_runtime": 0.6464,
|
17 |
+
"eval_samples_per_second": 428.502,
|
18 |
+
"eval_steps_per_second": 13.922,
|
19 |
"step": 39
|
20 |
}
|
21 |
],
|
22 |
"logging_steps": 500,
|
23 |
+
"max_steps": 585,
|
24 |
"num_input_tokens_seen": 0,
|
25 |
+
"num_train_epochs": 15,
|
26 |
"save_steps": 500,
|
27 |
"stateful_callbacks": {
|
28 |
"TrainerControl": {
|
|
|
40 |
"train_batch_size": 64,
|
41 |
"trial_name": null,
|
42 |
"trial_params": {
|
43 |
+
"learning_rate": 4.267043632502465e-05,
|
44 |
+
"num_train_epochs": 15,
|
45 |
"per_device_train_batch_size": 64
|
46 |
}
|
47 |
}
|
run-10/checkpoint-39/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca59d4c229b54e4a43ebfb084c4a5dae6a76fa9a553c092a64615db9e9ac09fa
|
3 |
size 5432
|
run-10/checkpoint-78/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 437958648
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f7f695c888e8b4447ada63c1486357576f38717e70641c5384fcd14e988fa5f
|
3 |
size 437958648
|
run-10/checkpoint-78/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 876038394
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5feb162e1c7ddf695561a3dcc5f22133ada525d3248eab2310779fc9768fd8d1
|
3 |
size 876038394
|
run-10/checkpoint-78/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86637fe2567c83daac0320d69dbffa725366530e20017dd87cdf8631fba49479
|
3 |
size 1064
|
run-10/checkpoint-78/trainer_state.json
CHANGED
@@ -11,27 +11,27 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 1.0,
|
14 |
-
"eval_accuracy": 0.
|
15 |
-
"eval_loss": 0.
|
16 |
-
"eval_runtime": 0.
|
17 |
-
"eval_samples_per_second":
|
18 |
-
"eval_steps_per_second":
|
19 |
"step": 39
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
"eval_accuracy": 0.6534296028880866,
|
24 |
-
"eval_loss": 0.
|
25 |
-
"eval_runtime": 0.
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_steps_per_second": 14.
|
28 |
"step": 78
|
29 |
}
|
30 |
],
|
31 |
"logging_steps": 500,
|
32 |
-
"max_steps":
|
33 |
"num_input_tokens_seen": 0,
|
34 |
-
"num_train_epochs":
|
35 |
"save_steps": 500,
|
36 |
"stateful_callbacks": {
|
37 |
"TrainerControl": {
|
@@ -40,7 +40,7 @@
|
|
40 |
"should_evaluate": false,
|
41 |
"should_log": false,
|
42 |
"should_save": true,
|
43 |
-
"should_training_stop":
|
44 |
},
|
45 |
"attributes": {}
|
46 |
}
|
@@ -49,8 +49,8 @@
|
|
49 |
"train_batch_size": 64,
|
50 |
"trial_name": null,
|
51 |
"trial_params": {
|
52 |
-
"learning_rate":
|
53 |
-
"num_train_epochs":
|
54 |
"per_device_train_batch_size": 64
|
55 |
}
|
56 |
}
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.6209386281588448,
|
15 |
+
"eval_loss": 0.6498723030090332,
|
16 |
+
"eval_runtime": 0.6464,
|
17 |
+
"eval_samples_per_second": 428.502,
|
18 |
+
"eval_steps_per_second": 13.922,
|
19 |
"step": 39
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
"eval_accuracy": 0.6534296028880866,
|
24 |
+
"eval_loss": 0.6439489126205444,
|
25 |
+
"eval_runtime": 0.6414,
|
26 |
+
"eval_samples_per_second": 431.884,
|
27 |
+
"eval_steps_per_second": 14.032,
|
28 |
"step": 78
|
29 |
}
|
30 |
],
|
31 |
"logging_steps": 500,
|
32 |
+
"max_steps": 585,
|
33 |
"num_input_tokens_seen": 0,
|
34 |
+
"num_train_epochs": 15,
|
35 |
"save_steps": 500,
|
36 |
"stateful_callbacks": {
|
37 |
"TrainerControl": {
|
|
|
40 |
"should_evaluate": false,
|
41 |
"should_log": false,
|
42 |
"should_save": true,
|
43 |
+
"should_training_stop": false
|
44 |
},
|
45 |
"attributes": {}
|
46 |
}
|
|
|
49 |
"train_batch_size": 64,
|
50 |
"trial_name": null,
|
51 |
"trial_params": {
|
52 |
+
"learning_rate": 4.267043632502465e-05,
|
53 |
+
"num_train_epochs": 15,
|
54 |
"per_device_train_batch_size": 64
|
55 |
}
|
56 |
}
|
run-10/checkpoint-78/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca59d4c229b54e4a43ebfb084c4a5dae6a76fa9a553c092a64615db9e9ac09fa
|
3 |
size 5432
|
run-11/checkpoint-39/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 437958648
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd42fc3b3470f245ee25b1a91c24b73c17f7cb4a9e86512bfce15bcf57a02781
|
3 |
size 437958648
|
run-11/checkpoint-39/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 876038394
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79da32a34ab09addf9391dfc6f66924d99d858eb9c859ead19ce7c83fac90100
|
3 |
size 876038394
|
run-11/checkpoint-39/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0c23801a5ddb22ded05ffc9d23b7e52483659cbd731822c0847adb92e8c801f
|
3 |
size 1064
|
run-11/checkpoint-39/trainer_state.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"best_global_step": 39,
|
3 |
-
"best_metric": 0.
|
4 |
"best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-11/checkpoint-39",
|
5 |
"epoch": 1.0,
|
6 |
"eval_steps": 500,
|
@@ -11,18 +11,18 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 1.0,
|
14 |
-
"eval_accuracy": 0.
|
15 |
-
"eval_loss": 0.
|
16 |
-
"eval_runtime": 0.
|
17 |
-
"eval_samples_per_second":
|
18 |
-
"eval_steps_per_second": 14.
|
19 |
"step": 39
|
20 |
}
|
21 |
],
|
22 |
"logging_steps": 500,
|
23 |
-
"max_steps":
|
24 |
"num_input_tokens_seen": 0,
|
25 |
-
"num_train_epochs":
|
26 |
"save_steps": 500,
|
27 |
"stateful_callbacks": {
|
28 |
"TrainerControl": {
|
@@ -40,8 +40,8 @@
|
|
40 |
"train_batch_size": 64,
|
41 |
"trial_name": null,
|
42 |
"trial_params": {
|
43 |
-
"learning_rate": 6.
|
44 |
-
"num_train_epochs":
|
45 |
"per_device_train_batch_size": 64
|
46 |
}
|
47 |
}
|
|
|
1 |
{
|
2 |
"best_global_step": 39,
|
3 |
+
"best_metric": 0.6173285198555957,
|
4 |
"best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-11/checkpoint-39",
|
5 |
"epoch": 1.0,
|
6 |
"eval_steps": 500,
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.6173285198555957,
|
15 |
+
"eval_loss": 0.6398611664772034,
|
16 |
+
"eval_runtime": 0.6423,
|
17 |
+
"eval_samples_per_second": 431.296,
|
18 |
+
"eval_steps_per_second": 14.013,
|
19 |
"step": 39
|
20 |
}
|
21 |
],
|
22 |
"logging_steps": 500,
|
23 |
+
"max_steps": 585,
|
24 |
"num_input_tokens_seen": 0,
|
25 |
+
"num_train_epochs": 15,
|
26 |
"save_steps": 500,
|
27 |
"stateful_callbacks": {
|
28 |
"TrainerControl": {
|
|
|
40 |
"train_batch_size": 64,
|
41 |
"trial_name": null,
|
42 |
"trial_params": {
|
43 |
+
"learning_rate": 6.0322373827171756e-05,
|
44 |
+
"num_train_epochs": 15,
|
45 |
"per_device_train_batch_size": 64
|
46 |
}
|
47 |
}
|
run-11/checkpoint-39/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b97468c089ba6ff33fc12e4a0700ac465e7cc381a5949bbe5c24c3f27650b69
|
3 |
size 5432
|
run-6/checkpoint-78/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 437958648
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb1ba5c8089724d0d7f4bcf542f63a7e62dd7b03fb7b0815005b3d794e25fd24
|
3 |
size 437958648
|
run-6/checkpoint-78/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 876038394
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c6827b667b79d074df0df65b191ef0334701a4bd9c6e21bc9f0862c0cb3ac9c
|
3 |
size 876038394
|
run-6/checkpoint-78/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67bcf98a98ab76470c935b8832145270959789541c75629573da5ffe7cb94209
|
3 |
size 1064
|
run-6/checkpoint-78/trainer_state.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"best_global_step": 78,
|
3 |
-
"best_metric": 0.
|
4 |
"best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-6/checkpoint-78",
|
5 |
"epoch": 2.0,
|
6 |
"eval_steps": 500,
|
@@ -11,27 +11,27 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 1.0,
|
14 |
-
"eval_accuracy": 0.
|
15 |
-
"eval_loss": 0.
|
16 |
-
"eval_runtime": 0.
|
17 |
-
"eval_samples_per_second":
|
18 |
-
"eval_steps_per_second":
|
19 |
"step": 39
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
-
"eval_accuracy": 0.
|
24 |
-
"eval_loss": 0.
|
25 |
-
"eval_runtime": 0.
|
26 |
-
"eval_samples_per_second":
|
27 |
-
"eval_steps_per_second": 14.
|
28 |
"step": 78
|
29 |
}
|
30 |
],
|
31 |
"logging_steps": 500,
|
32 |
-
"max_steps":
|
33 |
"num_input_tokens_seen": 0,
|
34 |
-
"num_train_epochs":
|
35 |
"save_steps": 500,
|
36 |
"stateful_callbacks": {
|
37 |
"TrainerControl": {
|
@@ -40,7 +40,7 @@
|
|
40 |
"should_evaluate": false,
|
41 |
"should_log": false,
|
42 |
"should_save": true,
|
43 |
-
"should_training_stop":
|
44 |
},
|
45 |
"attributes": {}
|
46 |
}
|
@@ -49,8 +49,8 @@
|
|
49 |
"train_batch_size": 64,
|
50 |
"trial_name": null,
|
51 |
"trial_params": {
|
52 |
-
"learning_rate":
|
53 |
-
"num_train_epochs":
|
54 |
"per_device_train_batch_size": 64
|
55 |
}
|
56 |
}
|
|
|
1 |
{
|
2 |
"best_global_step": 78,
|
3 |
+
"best_metric": 0.6389891696750902,
|
4 |
"best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-6/checkpoint-78",
|
5 |
"epoch": 2.0,
|
6 |
"eval_steps": 500,
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.6101083032490975,
|
15 |
+
"eval_loss": 0.6483533978462219,
|
16 |
+
"eval_runtime": 0.6678,
|
17 |
+
"eval_samples_per_second": 414.792,
|
18 |
+
"eval_steps_per_second": 13.477,
|
19 |
"step": 39
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.0,
|
23 |
+
"eval_accuracy": 0.6389891696750902,
|
24 |
+
"eval_loss": 0.6526868343353271,
|
25 |
+
"eval_runtime": 0.6398,
|
26 |
+
"eval_samples_per_second": 432.973,
|
27 |
+
"eval_steps_per_second": 14.068,
|
28 |
"step": 78
|
29 |
}
|
30 |
],
|
31 |
"logging_steps": 500,
|
32 |
+
"max_steps": 585,
|
33 |
"num_input_tokens_seen": 0,
|
34 |
+
"num_train_epochs": 15,
|
35 |
"save_steps": 500,
|
36 |
"stateful_callbacks": {
|
37 |
"TrainerControl": {
|
|
|
40 |
"should_evaluate": false,
|
41 |
"should_log": false,
|
42 |
"should_save": true,
|
43 |
+
"should_training_stop": false
|
44 |
},
|
45 |
"attributes": {}
|
46 |
}
|
|
|
49 |
"train_batch_size": 64,
|
50 |
"trial_name": null,
|
51 |
"trial_params": {
|
52 |
+
"learning_rate": 4.797258707523021e-05,
|
53 |
+
"num_train_epochs": 15,
|
54 |
"per_device_train_batch_size": 64
|
55 |
}
|
56 |
}
|
run-6/checkpoint-78/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f932c25ae644f210e4526f3c1be14c7f7fe52253969f112e7a5e335bb77ad857
|
3 |
size 5432
|
run-8/checkpoint-39/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertForSequenceClassification"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 3072,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"problem_type": "single_label_classification",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.50.2",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30522
|
26 |
+
}
|
run-8/checkpoint-39/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c9e956b4a26c697b232b9334cd775e3b51aed9bce2573b1e76f7dae5e4a7c6b
|
3 |
+
size 437958648
|
run-8/checkpoint-39/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:820a37fdd0077c3ce86d9024cd6fb7e3a34fae69f7d9c28732ed25e4912cfd8a
|
3 |
+
size 876038394
|
run-8/checkpoint-39/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ce2001d6c41d462c4a530df5214c4ba6ac04088f8883ec9b91629a00a7da50d
|
3 |
+
size 14244
|
run-8/checkpoint-39/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19d792f52af224ccafbcd7e21651118681b90d6c9cc69043551847eddb44485b
|
3 |
+
size 1064
|
run-8/checkpoint-39/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-8/checkpoint-39/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-8/checkpoint-39/tokenizer_config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": false,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"extra_special_tokens": {},
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"model_max_length": 512,
|
50 |
+
"pad_token": "[PAD]",
|
51 |
+
"sep_token": "[SEP]",
|
52 |
+
"strip_accents": null,
|
53 |
+
"tokenize_chinese_chars": true,
|
54 |
+
"tokenizer_class": "BertTokenizer",
|
55 |
+
"unk_token": "[UNK]"
|
56 |
+
}
|
run-8/checkpoint-39/trainer_state.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": 39,
|
3 |
+
"best_metric": 0.6064981949458483,
|
4 |
+
"best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_3/run-8/checkpoint-39",
|
5 |
+
"epoch": 1.0,
|
6 |
+
"eval_steps": 500,
|
7 |
+
"global_step": 39,
|
8 |
+
"is_hyper_param_search": true,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 1.0,
|
14 |
+
"eval_accuracy": 0.6064981949458483,
|
15 |
+
"eval_loss": 0.6455614566802979,
|
16 |
+
"eval_runtime": 0.6414,
|
17 |
+
"eval_samples_per_second": 431.878,
|
18 |
+
"eval_steps_per_second": 14.032,
|
19 |
+
"step": 39
|
20 |
+
}
|
21 |
+
],
|
22 |
+
"logging_steps": 500,
|
23 |
+
"max_steps": 585,
|
24 |
+
"num_input_tokens_seen": 0,
|
25 |
+
"num_train_epochs": 15,
|
26 |
+
"save_steps": 500,
|
27 |
+
"stateful_callbacks": {
|
28 |
+
"TrainerControl": {
|
29 |
+
"args": {
|
30 |
+
"should_epoch_stop": false,
|
31 |
+
"should_evaluate": false,
|
32 |
+
"should_log": false,
|
33 |
+
"should_save": true,
|
34 |
+
"should_training_stop": false
|
35 |
+
},
|
36 |
+
"attributes": {}
|
37 |
+
}
|
38 |
+
},
|
39 |
+
"total_flos": 0,
|
40 |
+
"train_batch_size": 64,
|
41 |
+
"trial_name": null,
|
42 |
+
"trial_params": {
|
43 |
+
"learning_rate": 5.2838273651328355e-05,
|
44 |
+
"num_train_epochs": 15,
|
45 |
+
"per_device_train_batch_size": 64
|
46 |
+
}
|
47 |
+
}
|
run-8/checkpoint-39/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f9c40a0916e7974092f2fa779ef8cfb9240339680ff02e53018c1dfa9048c25
|
3 |
+
size 5432
|
run-8/checkpoint-39/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596808.80ab07271599.1157.9
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c74dcbf62988c7e22aa412f5aadb89285e05980607425d5d45967f0d31e712d3
|
3 |
+
size 5419
|
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596828.80ab07271599.1157.10
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3049f8b858ee598753195ed39772875c6763010b29b910e463557107d00c9c70
|
3 |
+
size 5737
|
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596868.80ab07271599.1157.11
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f21ea402050781a5fc1e7205739b21548beae7a910ff5daf3569700937f263bf
|
3 |
+
size 5420
|
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596888.80ab07271599.1157.12
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4e162a963f306eddcb3d58edc4ef38e2e8c39d7a3b3163435aa9d3eefb6bb8a
|
3 |
+
size 6376
|
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743596968.80ab07271599.1157.13
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26ab4deaa4cec5d624e1c37c96a35688c766f834c40ca99e777ed48ccee10e77
|
3 |
+
size 5737
|
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743597008.80ab07271599.1157.14
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:123613b55c16fb967f3fe157b40416f8dfb382adfc6cb8550d6fce811f02dfd9
|
3 |
+
size 10494
|
runs/Apr02_11-23-36_80ab07271599/events.out.tfevents.1743597833.80ab07271599.1157.15
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:618b5e99bf134c50583c5eeeb277df0fd50f4c888a066e9c2c6cc8bd4aaeabd7
|
3 |
+
size 10494
|