kaantureyyen commited on
Commit
1e2d873
·
verified ·
1 Parent(s): e97572b

Upload 7 files

Browse files
Files changed (7) hide show
  1. config.json +59 -0
  2. model.safetensors +3 -0
  3. optimizer.pt +3 -0
  4. rng_state.pth +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +142 -0
  7. training_args.bin +3 -0
config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-small",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "LABEL_0",
12
+ "1": "LABEL_1",
13
+ "2": "LABEL_2",
14
+ "3": "LABEL_3",
15
+ "4": "LABEL_4",
16
+ "5": "LABEL_5",
17
+ "6": "LABEL_6",
18
+ "7": "LABEL_7",
19
+ "8": "LABEL_8",
20
+ "9": "LABEL_9"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "LABEL_0": 0,
26
+ "LABEL_1": 1,
27
+ "LABEL_2": 2,
28
+ "LABEL_3": 3,
29
+ "LABEL_4": 4,
30
+ "LABEL_5": 5,
31
+ "LABEL_6": 6,
32
+ "LABEL_7": 7,
33
+ "LABEL_8": 8,
34
+ "LABEL_9": 9
35
+ },
36
+ "layer_norm_eps": 1e-07,
37
+ "max_position_embeddings": 512,
38
+ "max_relative_positions": -1,
39
+ "model_type": "deberta-v2",
40
+ "norm_rel_ebd": "layer_norm",
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 6,
43
+ "pad_token_id": 0,
44
+ "pooler_dropout": 0,
45
+ "pooler_hidden_act": "gelu",
46
+ "pooler_hidden_size": 768,
47
+ "pos_att_type": [
48
+ "p2c",
49
+ "c2p"
50
+ ],
51
+ "position_biased_input": false,
52
+ "position_buckets": 256,
53
+ "relative_attention": true,
54
+ "share_att_key": true,
55
+ "torch_dtype": "float32",
56
+ "transformers_version": "4.46.2",
57
+ "type_vocab_size": 0,
58
+ "vocab_size": 128100
59
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a54311afd6d219bfecfcba55d136e089b53f404c2f90bfc87509a6f523d415c
3
+ size 567623168
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b985cb3a10f79c653a17edbc7b235361f7d19f577e4c82aeebc607ed76e9d7e
3
+ size 1135309626
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78f7bc43edb1b7c5e83fa99a8129c4e05319e718918d3b2f0ec31e2658674ce1
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b1976b00ccea2a466a1aee95f120917a46e9b6544b70403ed75ea1ef84239b6
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6359876614042939,
3
+ "best_model_checkpoint": "model_checkpoints/fine-tuned_blog-authorship-attribution/microsoft/deberta-v3-small/checkpoint-1875",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1875,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 35.22766876220703,
14
+ "learning_rate": 4.0106666666666673e-05,
15
+ "loss": 1.6036,
16
+ "step": 375
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.555,
21
+ "eval_loss": 1.1462026834487915,
22
+ "eval_macro_f1": 0.5353708602721241,
23
+ "eval_macro_precision": 0.5497714189118279,
24
+ "eval_macro_recall": 0.5549999999999999,
25
+ "eval_micro_f1": 0.555,
26
+ "eval_runtime": 282.8346,
27
+ "eval_samples_per_second": 7.071,
28
+ "eval_steps_per_second": 0.884,
29
+ "step": 375
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "grad_norm": 39.075862884521484,
34
+ "learning_rate": 3.0106666666666668e-05,
35
+ "loss": 0.9511,
36
+ "step": 750
37
+ },
38
+ {
39
+ "epoch": 2.0,
40
+ "eval_accuracy": 0.6065,
41
+ "eval_loss": 0.9465224742889404,
42
+ "eval_macro_f1": 0.5896716169178771,
43
+ "eval_macro_precision": 0.5985288261029209,
44
+ "eval_macro_recall": 0.6065,
45
+ "eval_micro_f1": 0.6065,
46
+ "eval_runtime": 282.8871,
47
+ "eval_samples_per_second": 7.07,
48
+ "eval_steps_per_second": 0.884,
49
+ "step": 750
50
+ },
51
+ {
52
+ "epoch": 3.0,
53
+ "grad_norm": 23.99466323852539,
54
+ "learning_rate": 2.010666666666667e-05,
55
+ "loss": 0.6326,
56
+ "step": 1125
57
+ },
58
+ {
59
+ "epoch": 3.0,
60
+ "eval_accuracy": 0.6185,
61
+ "eval_loss": 0.9319701194763184,
62
+ "eval_macro_f1": 0.6044739785737623,
63
+ "eval_macro_precision": 0.6157976975878152,
64
+ "eval_macro_recall": 0.6184999999999998,
65
+ "eval_micro_f1": 0.6185,
66
+ "eval_runtime": 282.892,
67
+ "eval_samples_per_second": 7.07,
68
+ "eval_steps_per_second": 0.884,
69
+ "step": 1125
70
+ },
71
+ {
72
+ "epoch": 4.0,
73
+ "grad_norm": 13.676905632019043,
74
+ "learning_rate": 1.0106666666666668e-05,
75
+ "loss": 0.4675,
76
+ "step": 1500
77
+ },
78
+ {
79
+ "epoch": 4.0,
80
+ "eval_accuracy": 0.6415,
81
+ "eval_loss": 0.9401471614837646,
82
+ "eval_macro_f1": 0.6023451748227971,
83
+ "eval_macro_precision": 0.5992999443077366,
84
+ "eval_macro_recall": 0.6415000000000001,
85
+ "eval_micro_f1": 0.6415,
86
+ "eval_runtime": 282.864,
87
+ "eval_samples_per_second": 7.071,
88
+ "eval_steps_per_second": 0.884,
89
+ "step": 1500
90
+ },
91
+ {
92
+ "epoch": 5.0,
93
+ "grad_norm": 67.77677917480469,
94
+ "learning_rate": 1.0666666666666668e-07,
95
+ "loss": 0.3708,
96
+ "step": 1875
97
+ },
98
+ {
99
+ "epoch": 5.0,
100
+ "eval_accuracy": 0.639,
101
+ "eval_loss": 0.9551867842674255,
102
+ "eval_macro_f1": 0.6359876614042939,
103
+ "eval_macro_precision": 0.6469646011112227,
104
+ "eval_macro_recall": 0.639,
105
+ "eval_micro_f1": 0.639,
106
+ "eval_runtime": 282.9465,
107
+ "eval_samples_per_second": 7.068,
108
+ "eval_steps_per_second": 0.884,
109
+ "step": 1875
110
+ }
111
+ ],
112
+ "logging_steps": 500,
113
+ "max_steps": 1875,
114
+ "num_input_tokens_seen": 0,
115
+ "num_train_epochs": 5,
116
+ "save_steps": 500,
117
+ "stateful_callbacks": {
118
+ "EarlyStoppingCallback": {
119
+ "args": {
120
+ "early_stopping_patience": 2,
121
+ "early_stopping_threshold": 0.01
122
+ },
123
+ "attributes": {
124
+ "early_stopping_patience_counter": 0
125
+ }
126
+ },
127
+ "TrainerControl": {
128
+ "args": {
129
+ "should_epoch_stop": false,
130
+ "should_evaluate": false,
131
+ "should_log": false,
132
+ "should_save": true,
133
+ "should_training_stop": true
134
+ },
135
+ "attributes": {}
136
+ }
137
+ },
138
+ "total_flos": 3974730485760000.0,
139
+ "train_batch_size": 4,
140
+ "trial_name": null,
141
+ "trial_params": null
142
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3892c9f121b8fd4f22f7712c581e8d07c3a1cdb564a04cdb9aba454a7433bdf9
3
+ size 5304