wangchen615 commited on
Commit
dc008c8
·
verified ·
1 Parent(s): 7a68741

Delete checkpoint-330

Browse files
checkpoint-330/config.json DELETED
@@ -1,119 +0,0 @@
1
- {
2
- "architectures": [
3
- "ModernBertForTokenClassification"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 50281,
8
- "classifier_activation": "gelu",
9
- "classifier_bias": false,
10
- "classifier_dropout": 0.0,
11
- "classifier_pooling": "mean",
12
- "cls_token_id": 50281,
13
- "decoder_bias": true,
14
- "deterministic_flash_attn": false,
15
- "embedding_dropout": 0.0,
16
- "eos_token_id": 50282,
17
- "global_attn_every_n_layers": 3,
18
- "global_rope_theta": 160000.0,
19
- "gradient_checkpointing": false,
20
- "hidden_activation": "gelu",
21
- "hidden_size": 768,
22
- "id2label": {
23
- "0": "O",
24
- "1": "B-AGE",
25
- "2": "I-AGE",
26
- "3": "B-CREDIT_CARD",
27
- "4": "I-CREDIT_CARD",
28
- "5": "B-DATE_TIME",
29
- "6": "I-DATE_TIME",
30
- "7": "B-DOMAIN_NAME",
31
- "8": "I-DOMAIN_NAME",
32
- "9": "B-EMAIL_ADDRESS",
33
- "10": "I-EMAIL_ADDRESS",
34
- "11": "B-GPE",
35
- "12": "I-GPE",
36
- "13": "B-IBAN_CODE",
37
- "14": "I-IBAN_CODE",
38
- "15": "B-IP_ADDRESS",
39
- "16": "I-IP_ADDRESS",
40
- "17": "B-NRP",
41
- "18": "I-NRP",
42
- "19": "B-ORGANIZATION",
43
- "20": "I-ORGANIZATION",
44
- "21": "B-PERSON",
45
- "22": "I-PERSON",
46
- "23": "B-PHONE_NUMBER",
47
- "24": "I-PHONE_NUMBER",
48
- "25": "B-STREET_ADDRESS",
49
- "26": "I-STREET_ADDRESS",
50
- "27": "B-TITLE",
51
- "28": "I-TITLE",
52
- "29": "B-US_DRIVER_LICENSE",
53
- "30": "I-US_DRIVER_LICENSE",
54
- "31": "B-US_SSN",
55
- "32": "I-US_SSN",
56
- "33": "B-ZIP_CODE",
57
- "34": "I-ZIP_CODE"
58
- },
59
- "initializer_cutoff_factor": 2.0,
60
- "initializer_range": 0.02,
61
- "intermediate_size": 1152,
62
- "label2id": {
63
- "B-AGE": 1,
64
- "B-CREDIT_CARD": 3,
65
- "B-DATE_TIME": 5,
66
- "B-DOMAIN_NAME": 7,
67
- "B-EMAIL_ADDRESS": 9,
68
- "B-GPE": 11,
69
- "B-IBAN_CODE": 13,
70
- "B-IP_ADDRESS": 15,
71
- "B-NRP": 17,
72
- "B-ORGANIZATION": 19,
73
- "B-PERSON": 21,
74
- "B-PHONE_NUMBER": 23,
75
- "B-STREET_ADDRESS": 25,
76
- "B-TITLE": 27,
77
- "B-US_DRIVER_LICENSE": 29,
78
- "B-US_SSN": 31,
79
- "B-ZIP_CODE": 33,
80
- "I-AGE": 2,
81
- "I-CREDIT_CARD": 4,
82
- "I-DATE_TIME": 6,
83
- "I-DOMAIN_NAME": 8,
84
- "I-EMAIL_ADDRESS": 10,
85
- "I-GPE": 12,
86
- "I-IBAN_CODE": 14,
87
- "I-IP_ADDRESS": 16,
88
- "I-NRP": 18,
89
- "I-ORGANIZATION": 20,
90
- "I-PERSON": 22,
91
- "I-PHONE_NUMBER": 24,
92
- "I-STREET_ADDRESS": 26,
93
- "I-TITLE": 28,
94
- "I-US_DRIVER_LICENSE": 30,
95
- "I-US_SSN": 32,
96
- "I-ZIP_CODE": 34,
97
- "O": 0
98
- },
99
- "layer_norm_eps": 1e-05,
100
- "local_attention": 128,
101
- "local_rope_theta": 10000.0,
102
- "max_position_embeddings": 8192,
103
- "mlp_bias": false,
104
- "mlp_dropout": 0.0,
105
- "model_type": "modernbert",
106
- "norm_bias": false,
107
- "norm_eps": 1e-05,
108
- "num_attention_heads": 12,
109
- "num_hidden_layers": 22,
110
- "pad_token_id": 50283,
111
- "position_embedding_type": "absolute",
112
- "repad_logits_with_grad": false,
113
- "sep_token_id": 50282,
114
- "sparse_pred_ignore_index": -100,
115
- "sparse_prediction": false,
116
- "torch_dtype": "float32",
117
- "transformers_version": "4.55.2",
118
- "vocab_size": 50368
119
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-330/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb6ec1698bff6906a665266266f3f8b2e743b6619236b6e7c4fa74a76f0715ca
3
- size 598541300
 
 
 
 
checkpoint-330/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:175a3fdb326dd0f591e2398306ac7a215399ef4d9545e6aa4c3b44a8353298a8
3
- size 1197172747
 
 
 
 
checkpoint-330/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed3753ab7977739b8eda494dd72defae5750f7283141b11a8f562160ba4c1a23
3
- size 14645
 
 
 
 
checkpoint-330/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:91ca43ad54404cb852f61f3179fa0d50369b7bed2a3d846ad14d8537ea3c12a9
3
- size 1465
 
 
 
 
checkpoint-330/trainer_state.json DELETED
@@ -1,175 +0,0 @@
1
- {
2
- "best_global_step": 330,
3
- "best_metric": 0.9632840387557369,
4
- "best_model_checkpoint": "pii_classifier_modernbert-base_presidio_token_model/checkpoint-330",
5
- "epoch": 10.0,
6
- "eval_steps": 500,
7
- "global_step": 330,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 1.0,
14
- "eval_accuracy": 0.5799762026177121,
15
- "eval_f1": 0.4695039711201284,
16
- "eval_loss": 1.893957495689392,
17
- "eval_precision": 0.4935126787661159,
18
- "eval_recall": 0.5799762026177121,
19
- "eval_runtime": 23.603,
20
- "eval_samples_per_second": 9.533,
21
- "eval_steps_per_second": 0.636,
22
- "step": 33
23
- },
24
- {
25
- "epoch": 2.0,
26
- "eval_accuracy": 0.7533571307156213,
27
- "eval_f1": 0.7111688388751078,
28
- "eval_loss": 0.9364706873893738,
29
- "eval_precision": 0.7083445547100621,
30
- "eval_recall": 0.7533571307156213,
31
- "eval_runtime": 23.6426,
32
- "eval_samples_per_second": 9.517,
33
- "eval_steps_per_second": 0.634,
34
- "step": 66
35
- },
36
- {
37
- "epoch": 3.0,
38
- "eval_accuracy": 0.8733639299677035,
39
- "eval_f1": 0.8472559039016181,
40
- "eval_loss": 0.4803955852985382,
41
- "eval_precision": 0.8461851394388445,
42
- "eval_recall": 0.8733639299677035,
43
- "eval_runtime": 23.7467,
44
- "eval_samples_per_second": 9.475,
45
- "eval_steps_per_second": 0.632,
46
- "step": 99
47
- },
48
- {
49
- "epoch": 3.0303030303030303,
50
- "grad_norm": 3.357513427734375,
51
- "learning_rate": 8.83022221559489e-06,
52
- "loss": 1.7939,
53
- "step": 100
54
- },
55
- {
56
- "epoch": 4.0,
57
- "eval_accuracy": 0.9144994050654428,
58
- "eval_f1": 0.9046430604528387,
59
- "eval_loss": 0.30626583099365234,
60
- "eval_precision": 0.9007848165265108,
61
- "eval_recall": 0.9144994050654428,
62
- "eval_runtime": 23.9053,
63
- "eval_samples_per_second": 9.412,
64
- "eval_steps_per_second": 0.627,
65
- "step": 132
66
- },
67
- {
68
- "epoch": 5.0,
69
- "eval_accuracy": 0.9464558898521163,
70
- "eval_f1": 0.941120594338562,
71
- "eval_loss": 0.20138122141361237,
72
- "eval_precision": 0.9377487874133417,
73
- "eval_recall": 0.9464558898521163,
74
- "eval_runtime": 23.6064,
75
- "eval_samples_per_second": 9.531,
76
- "eval_steps_per_second": 0.635,
77
- "step": 165
78
- },
79
- {
80
- "epoch": 6.0,
81
- "eval_accuracy": 0.9552949175590685,
82
- "eval_f1": 0.9524039016337489,
83
- "eval_loss": 0.16908280551433563,
84
- "eval_precision": 0.9532284447896412,
85
- "eval_recall": 0.9552949175590685,
86
- "eval_runtime": 23.516,
87
- "eval_samples_per_second": 9.568,
88
- "eval_steps_per_second": 0.638,
89
- "step": 198
90
- },
91
- {
92
- "epoch": 6.0606060606060606,
93
- "grad_norm": 1.2425391674041748,
94
- "learning_rate": 4.079723389713899e-06,
95
- "loss": 0.1942,
96
- "step": 200
97
- },
98
- {
99
- "epoch": 7.0,
100
- "eval_accuracy": 0.960224375318715,
101
- "eval_f1": 0.9576892292455826,
102
- "eval_loss": 0.1495475322008133,
103
- "eval_precision": 0.9587472552812538,
104
- "eval_recall": 0.960224375318715,
105
- "eval_runtime": 23.9106,
106
- "eval_samples_per_second": 9.41,
107
- "eval_steps_per_second": 0.627,
108
- "step": 231
109
- },
110
- {
111
- "epoch": 8.0,
112
- "eval_accuracy": 0.9631140574536801,
113
- "eval_f1": 0.9613609203737836,
114
- "eval_loss": 0.1422007530927658,
115
- "eval_precision": 0.9626175146629873,
116
- "eval_recall": 0.9631140574536801,
117
- "eval_runtime": 23.6358,
118
- "eval_samples_per_second": 9.519,
119
- "eval_steps_per_second": 0.635,
120
- "step": 264
121
- },
122
- {
123
- "epoch": 9.0,
124
- "eval_accuracy": 0.9629440761516234,
125
- "eval_f1": 0.9614999857063931,
126
- "eval_loss": 0.13786566257476807,
127
- "eval_precision": 0.9629680400789847,
128
- "eval_recall": 0.9629440761516234,
129
- "eval_runtime": 23.9025,
130
- "eval_samples_per_second": 9.413,
131
- "eval_steps_per_second": 0.628,
132
- "step": 297
133
- },
134
- {
135
- "epoch": 9.090909090909092,
136
- "grad_norm": 1.0872043371200562,
137
- "learning_rate": 2.664129206497479e-07,
138
- "loss": 0.0463,
139
- "step": 300
140
- },
141
- {
142
- "epoch": 10.0,
143
- "eval_accuracy": 0.9632840387557369,
144
- "eval_f1": 0.961914649184486,
145
- "eval_loss": 0.13768431544303894,
146
- "eval_precision": 0.9633718277416998,
147
- "eval_recall": 0.9632840387557369,
148
- "eval_runtime": 23.7603,
149
- "eval_samples_per_second": 9.47,
150
- "eval_steps_per_second": 0.631,
151
- "step": 330
152
- }
153
- ],
154
- "logging_steps": 100,
155
- "max_steps": 330,
156
- "num_input_tokens_seen": 0,
157
- "num_train_epochs": 10,
158
- "save_steps": 500,
159
- "stateful_callbacks": {
160
- "TrainerControl": {
161
- "args": {
162
- "should_epoch_stop": false,
163
- "should_evaluate": false,
164
- "should_log": false,
165
- "should_save": true,
166
- "should_training_stop": true
167
- },
168
- "attributes": {}
169
- }
170
- },
171
- "total_flos": 2.863020754944e+16,
172
- "train_batch_size": 16,
173
- "trial_name": null,
174
- "trial_params": null
175
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-330/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:315e6b72e779eb2321d1291722c827013c190ff330ee5e8ee58c3e70fe5772a5
3
- size 5777