Delete checkpoint-330
Browse files- checkpoint-330/config.json +0 -119
- checkpoint-330/model.safetensors +0 -3
- checkpoint-330/optimizer.pt +0 -3
- checkpoint-330/rng_state.pth +0 -3
- checkpoint-330/scheduler.pt +0 -3
- checkpoint-330/trainer_state.json +0 -175
- checkpoint-330/training_args.bin +0 -3
checkpoint-330/config.json
DELETED
@@ -1,119 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"architectures": [
|
3 |
-
"ModernBertForTokenClassification"
|
4 |
-
],
|
5 |
-
"attention_bias": false,
|
6 |
-
"attention_dropout": 0.0,
|
7 |
-
"bos_token_id": 50281,
|
8 |
-
"classifier_activation": "gelu",
|
9 |
-
"classifier_bias": false,
|
10 |
-
"classifier_dropout": 0.0,
|
11 |
-
"classifier_pooling": "mean",
|
12 |
-
"cls_token_id": 50281,
|
13 |
-
"decoder_bias": true,
|
14 |
-
"deterministic_flash_attn": false,
|
15 |
-
"embedding_dropout": 0.0,
|
16 |
-
"eos_token_id": 50282,
|
17 |
-
"global_attn_every_n_layers": 3,
|
18 |
-
"global_rope_theta": 160000.0,
|
19 |
-
"gradient_checkpointing": false,
|
20 |
-
"hidden_activation": "gelu",
|
21 |
-
"hidden_size": 768,
|
22 |
-
"id2label": {
|
23 |
-
"0": "O",
|
24 |
-
"1": "B-AGE",
|
25 |
-
"2": "I-AGE",
|
26 |
-
"3": "B-CREDIT_CARD",
|
27 |
-
"4": "I-CREDIT_CARD",
|
28 |
-
"5": "B-DATE_TIME",
|
29 |
-
"6": "I-DATE_TIME",
|
30 |
-
"7": "B-DOMAIN_NAME",
|
31 |
-
"8": "I-DOMAIN_NAME",
|
32 |
-
"9": "B-EMAIL_ADDRESS",
|
33 |
-
"10": "I-EMAIL_ADDRESS",
|
34 |
-
"11": "B-GPE",
|
35 |
-
"12": "I-GPE",
|
36 |
-
"13": "B-IBAN_CODE",
|
37 |
-
"14": "I-IBAN_CODE",
|
38 |
-
"15": "B-IP_ADDRESS",
|
39 |
-
"16": "I-IP_ADDRESS",
|
40 |
-
"17": "B-NRP",
|
41 |
-
"18": "I-NRP",
|
42 |
-
"19": "B-ORGANIZATION",
|
43 |
-
"20": "I-ORGANIZATION",
|
44 |
-
"21": "B-PERSON",
|
45 |
-
"22": "I-PERSON",
|
46 |
-
"23": "B-PHONE_NUMBER",
|
47 |
-
"24": "I-PHONE_NUMBER",
|
48 |
-
"25": "B-STREET_ADDRESS",
|
49 |
-
"26": "I-STREET_ADDRESS",
|
50 |
-
"27": "B-TITLE",
|
51 |
-
"28": "I-TITLE",
|
52 |
-
"29": "B-US_DRIVER_LICENSE",
|
53 |
-
"30": "I-US_DRIVER_LICENSE",
|
54 |
-
"31": "B-US_SSN",
|
55 |
-
"32": "I-US_SSN",
|
56 |
-
"33": "B-ZIP_CODE",
|
57 |
-
"34": "I-ZIP_CODE"
|
58 |
-
},
|
59 |
-
"initializer_cutoff_factor": 2.0,
|
60 |
-
"initializer_range": 0.02,
|
61 |
-
"intermediate_size": 1152,
|
62 |
-
"label2id": {
|
63 |
-
"B-AGE": 1,
|
64 |
-
"B-CREDIT_CARD": 3,
|
65 |
-
"B-DATE_TIME": 5,
|
66 |
-
"B-DOMAIN_NAME": 7,
|
67 |
-
"B-EMAIL_ADDRESS": 9,
|
68 |
-
"B-GPE": 11,
|
69 |
-
"B-IBAN_CODE": 13,
|
70 |
-
"B-IP_ADDRESS": 15,
|
71 |
-
"B-NRP": 17,
|
72 |
-
"B-ORGANIZATION": 19,
|
73 |
-
"B-PERSON": 21,
|
74 |
-
"B-PHONE_NUMBER": 23,
|
75 |
-
"B-STREET_ADDRESS": 25,
|
76 |
-
"B-TITLE": 27,
|
77 |
-
"B-US_DRIVER_LICENSE": 29,
|
78 |
-
"B-US_SSN": 31,
|
79 |
-
"B-ZIP_CODE": 33,
|
80 |
-
"I-AGE": 2,
|
81 |
-
"I-CREDIT_CARD": 4,
|
82 |
-
"I-DATE_TIME": 6,
|
83 |
-
"I-DOMAIN_NAME": 8,
|
84 |
-
"I-EMAIL_ADDRESS": 10,
|
85 |
-
"I-GPE": 12,
|
86 |
-
"I-IBAN_CODE": 14,
|
87 |
-
"I-IP_ADDRESS": 16,
|
88 |
-
"I-NRP": 18,
|
89 |
-
"I-ORGANIZATION": 20,
|
90 |
-
"I-PERSON": 22,
|
91 |
-
"I-PHONE_NUMBER": 24,
|
92 |
-
"I-STREET_ADDRESS": 26,
|
93 |
-
"I-TITLE": 28,
|
94 |
-
"I-US_DRIVER_LICENSE": 30,
|
95 |
-
"I-US_SSN": 32,
|
96 |
-
"I-ZIP_CODE": 34,
|
97 |
-
"O": 0
|
98 |
-
},
|
99 |
-
"layer_norm_eps": 1e-05,
|
100 |
-
"local_attention": 128,
|
101 |
-
"local_rope_theta": 10000.0,
|
102 |
-
"max_position_embeddings": 8192,
|
103 |
-
"mlp_bias": false,
|
104 |
-
"mlp_dropout": 0.0,
|
105 |
-
"model_type": "modernbert",
|
106 |
-
"norm_bias": false,
|
107 |
-
"norm_eps": 1e-05,
|
108 |
-
"num_attention_heads": 12,
|
109 |
-
"num_hidden_layers": 22,
|
110 |
-
"pad_token_id": 50283,
|
111 |
-
"position_embedding_type": "absolute",
|
112 |
-
"repad_logits_with_grad": false,
|
113 |
-
"sep_token_id": 50282,
|
114 |
-
"sparse_pred_ignore_index": -100,
|
115 |
-
"sparse_prediction": false,
|
116 |
-
"torch_dtype": "float32",
|
117 |
-
"transformers_version": "4.55.2",
|
118 |
-
"vocab_size": 50368
|
119 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-330/model.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:cb6ec1698bff6906a665266266f3f8b2e743b6619236b6e7c4fa74a76f0715ca
|
3 |
-
size 598541300
|
|
|
|
|
|
|
|
checkpoint-330/optimizer.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:175a3fdb326dd0f591e2398306ac7a215399ef4d9545e6aa4c3b44a8353298a8
|
3 |
-
size 1197172747
|
|
|
|
|
|
|
|
checkpoint-330/rng_state.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ed3753ab7977739b8eda494dd72defae5750f7283141b11a8f562160ba4c1a23
|
3 |
-
size 14645
|
|
|
|
|
|
|
|
checkpoint-330/scheduler.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:91ca43ad54404cb852f61f3179fa0d50369b7bed2a3d846ad14d8537ea3c12a9
|
3 |
-
size 1465
|
|
|
|
|
|
|
|
checkpoint-330/trainer_state.json
DELETED
@@ -1,175 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"best_global_step": 330,
|
3 |
-
"best_metric": 0.9632840387557369,
|
4 |
-
"best_model_checkpoint": "pii_classifier_modernbert-base_presidio_token_model/checkpoint-330",
|
5 |
-
"epoch": 10.0,
|
6 |
-
"eval_steps": 500,
|
7 |
-
"global_step": 330,
|
8 |
-
"is_hyper_param_search": false,
|
9 |
-
"is_local_process_zero": true,
|
10 |
-
"is_world_process_zero": true,
|
11 |
-
"log_history": [
|
12 |
-
{
|
13 |
-
"epoch": 1.0,
|
14 |
-
"eval_accuracy": 0.5799762026177121,
|
15 |
-
"eval_f1": 0.4695039711201284,
|
16 |
-
"eval_loss": 1.893957495689392,
|
17 |
-
"eval_precision": 0.4935126787661159,
|
18 |
-
"eval_recall": 0.5799762026177121,
|
19 |
-
"eval_runtime": 23.603,
|
20 |
-
"eval_samples_per_second": 9.533,
|
21 |
-
"eval_steps_per_second": 0.636,
|
22 |
-
"step": 33
|
23 |
-
},
|
24 |
-
{
|
25 |
-
"epoch": 2.0,
|
26 |
-
"eval_accuracy": 0.7533571307156213,
|
27 |
-
"eval_f1": 0.7111688388751078,
|
28 |
-
"eval_loss": 0.9364706873893738,
|
29 |
-
"eval_precision": 0.7083445547100621,
|
30 |
-
"eval_recall": 0.7533571307156213,
|
31 |
-
"eval_runtime": 23.6426,
|
32 |
-
"eval_samples_per_second": 9.517,
|
33 |
-
"eval_steps_per_second": 0.634,
|
34 |
-
"step": 66
|
35 |
-
},
|
36 |
-
{
|
37 |
-
"epoch": 3.0,
|
38 |
-
"eval_accuracy": 0.8733639299677035,
|
39 |
-
"eval_f1": 0.8472559039016181,
|
40 |
-
"eval_loss": 0.4803955852985382,
|
41 |
-
"eval_precision": 0.8461851394388445,
|
42 |
-
"eval_recall": 0.8733639299677035,
|
43 |
-
"eval_runtime": 23.7467,
|
44 |
-
"eval_samples_per_second": 9.475,
|
45 |
-
"eval_steps_per_second": 0.632,
|
46 |
-
"step": 99
|
47 |
-
},
|
48 |
-
{
|
49 |
-
"epoch": 3.0303030303030303,
|
50 |
-
"grad_norm": 3.357513427734375,
|
51 |
-
"learning_rate": 8.83022221559489e-06,
|
52 |
-
"loss": 1.7939,
|
53 |
-
"step": 100
|
54 |
-
},
|
55 |
-
{
|
56 |
-
"epoch": 4.0,
|
57 |
-
"eval_accuracy": 0.9144994050654428,
|
58 |
-
"eval_f1": 0.9046430604528387,
|
59 |
-
"eval_loss": 0.30626583099365234,
|
60 |
-
"eval_precision": 0.9007848165265108,
|
61 |
-
"eval_recall": 0.9144994050654428,
|
62 |
-
"eval_runtime": 23.9053,
|
63 |
-
"eval_samples_per_second": 9.412,
|
64 |
-
"eval_steps_per_second": 0.627,
|
65 |
-
"step": 132
|
66 |
-
},
|
67 |
-
{
|
68 |
-
"epoch": 5.0,
|
69 |
-
"eval_accuracy": 0.9464558898521163,
|
70 |
-
"eval_f1": 0.941120594338562,
|
71 |
-
"eval_loss": 0.20138122141361237,
|
72 |
-
"eval_precision": 0.9377487874133417,
|
73 |
-
"eval_recall": 0.9464558898521163,
|
74 |
-
"eval_runtime": 23.6064,
|
75 |
-
"eval_samples_per_second": 9.531,
|
76 |
-
"eval_steps_per_second": 0.635,
|
77 |
-
"step": 165
|
78 |
-
},
|
79 |
-
{
|
80 |
-
"epoch": 6.0,
|
81 |
-
"eval_accuracy": 0.9552949175590685,
|
82 |
-
"eval_f1": 0.9524039016337489,
|
83 |
-
"eval_loss": 0.16908280551433563,
|
84 |
-
"eval_precision": 0.9532284447896412,
|
85 |
-
"eval_recall": 0.9552949175590685,
|
86 |
-
"eval_runtime": 23.516,
|
87 |
-
"eval_samples_per_second": 9.568,
|
88 |
-
"eval_steps_per_second": 0.638,
|
89 |
-
"step": 198
|
90 |
-
},
|
91 |
-
{
|
92 |
-
"epoch": 6.0606060606060606,
|
93 |
-
"grad_norm": 1.2425391674041748,
|
94 |
-
"learning_rate": 4.079723389713899e-06,
|
95 |
-
"loss": 0.1942,
|
96 |
-
"step": 200
|
97 |
-
},
|
98 |
-
{
|
99 |
-
"epoch": 7.0,
|
100 |
-
"eval_accuracy": 0.960224375318715,
|
101 |
-
"eval_f1": 0.9576892292455826,
|
102 |
-
"eval_loss": 0.1495475322008133,
|
103 |
-
"eval_precision": 0.9587472552812538,
|
104 |
-
"eval_recall": 0.960224375318715,
|
105 |
-
"eval_runtime": 23.9106,
|
106 |
-
"eval_samples_per_second": 9.41,
|
107 |
-
"eval_steps_per_second": 0.627,
|
108 |
-
"step": 231
|
109 |
-
},
|
110 |
-
{
|
111 |
-
"epoch": 8.0,
|
112 |
-
"eval_accuracy": 0.9631140574536801,
|
113 |
-
"eval_f1": 0.9613609203737836,
|
114 |
-
"eval_loss": 0.1422007530927658,
|
115 |
-
"eval_precision": 0.9626175146629873,
|
116 |
-
"eval_recall": 0.9631140574536801,
|
117 |
-
"eval_runtime": 23.6358,
|
118 |
-
"eval_samples_per_second": 9.519,
|
119 |
-
"eval_steps_per_second": 0.635,
|
120 |
-
"step": 264
|
121 |
-
},
|
122 |
-
{
|
123 |
-
"epoch": 9.0,
|
124 |
-
"eval_accuracy": 0.9629440761516234,
|
125 |
-
"eval_f1": 0.9614999857063931,
|
126 |
-
"eval_loss": 0.13786566257476807,
|
127 |
-
"eval_precision": 0.9629680400789847,
|
128 |
-
"eval_recall": 0.9629440761516234,
|
129 |
-
"eval_runtime": 23.9025,
|
130 |
-
"eval_samples_per_second": 9.413,
|
131 |
-
"eval_steps_per_second": 0.628,
|
132 |
-
"step": 297
|
133 |
-
},
|
134 |
-
{
|
135 |
-
"epoch": 9.090909090909092,
|
136 |
-
"grad_norm": 1.0872043371200562,
|
137 |
-
"learning_rate": 2.664129206497479e-07,
|
138 |
-
"loss": 0.0463,
|
139 |
-
"step": 300
|
140 |
-
},
|
141 |
-
{
|
142 |
-
"epoch": 10.0,
|
143 |
-
"eval_accuracy": 0.9632840387557369,
|
144 |
-
"eval_f1": 0.961914649184486,
|
145 |
-
"eval_loss": 0.13768431544303894,
|
146 |
-
"eval_precision": 0.9633718277416998,
|
147 |
-
"eval_recall": 0.9632840387557369,
|
148 |
-
"eval_runtime": 23.7603,
|
149 |
-
"eval_samples_per_second": 9.47,
|
150 |
-
"eval_steps_per_second": 0.631,
|
151 |
-
"step": 330
|
152 |
-
}
|
153 |
-
],
|
154 |
-
"logging_steps": 100,
|
155 |
-
"max_steps": 330,
|
156 |
-
"num_input_tokens_seen": 0,
|
157 |
-
"num_train_epochs": 10,
|
158 |
-
"save_steps": 500,
|
159 |
-
"stateful_callbacks": {
|
160 |
-
"TrainerControl": {
|
161 |
-
"args": {
|
162 |
-
"should_epoch_stop": false,
|
163 |
-
"should_evaluate": false,
|
164 |
-
"should_log": false,
|
165 |
-
"should_save": true,
|
166 |
-
"should_training_stop": true
|
167 |
-
},
|
168 |
-
"attributes": {}
|
169 |
-
}
|
170 |
-
},
|
171 |
-
"total_flos": 2.863020754944e+16,
|
172 |
-
"train_batch_size": 16,
|
173 |
-
"trial_name": null,
|
174 |
-
"trial_params": null
|
175 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
checkpoint-330/training_args.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:315e6b72e779eb2321d1291722c827013c190ff330ee5e8ee58c3e70fe5772a5
|
3 |
-
size 5777
|
|
|
|
|
|
|
|