AI-Sweden-Models
/

ModernBERT-large

Model card Files Files and versions Community

timpal0l commited on Mar 26

Commit

47f4b60

·

verified ·

1 Parent(s): a9a4365

Upload tokenizer

Files changed (2) hide show

tokenizer.json +11 -1
tokenizer_config.json +2 -1

tokenizer.json CHANGED Viewed

@@ -58,7 +58,17 @@
     "trim_offsets": true,
     "use_regex": true
   },
-  "post_processor": null,
   "decoder": {
     "type": "ByteLevel",
     "add_prefix_space": true,

     "trim_offsets": true,
     "use_regex": true
   },
+  "post_processor": {
+    "type": "BertProcessing",
+    "sep": [
+      "[SEP]",
+      2
+    ],
+    "cls": [
+      "[CLS]",
+      1
+    ]
+  },
   "decoder": {
     "type": "ByteLevel",
     "add_prefix_space": true,

tokenizer_config.json CHANGED Viewed

@@ -43,10 +43,11 @@
   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
   "mask_token": "[MASK]",
   "model_max_length": 8192,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
-  "tokenizer_class": "PreTrainedTokenizerFast",
   "unk_token": "[UNK]"
 }

   },
   "clean_up_tokenization_spaces": true,
   "cls_token": "[CLS]",
+  "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "model_max_length": 8192,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
+  "tokenizer_class": "PreTrainedTokenizer",
   "unk_token": "[UNK]"
 }