Transformers
timpal0l commited on
Commit
47f4b60
·
verified ·
1 Parent(s): a9a4365

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +11 -1
  2. tokenizer_config.json +2 -1
tokenizer.json CHANGED
@@ -58,7 +58,17 @@
58
  "trim_offsets": true,
59
  "use_regex": true
60
  },
61
- "post_processor": null,
 
 
 
 
 
 
 
 
 
 
62
  "decoder": {
63
  "type": "ByteLevel",
64
  "add_prefix_space": true,
 
58
  "trim_offsets": true,
59
  "use_regex": true
60
  },
61
+ "post_processor": {
62
+ "type": "BertProcessing",
63
+ "sep": [
64
+ "[SEP]",
65
+ 2
66
+ ],
67
+ "cls": [
68
+ "[CLS]",
69
+ 1
70
+ ]
71
+ },
72
  "decoder": {
73
  "type": "ByteLevel",
74
  "add_prefix_space": true,
tokenizer_config.json CHANGED
@@ -43,10 +43,11 @@
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "mask_token": "[MASK]",
47
  "model_max_length": 8192,
48
  "pad_token": "[PAD]",
49
  "sep_token": "[SEP]",
50
- "tokenizer_class": "PreTrainedTokenizerFast",
51
  "unk_token": "[UNK]"
52
  }
 
43
  },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
+ "extra_special_tokens": {},
47
  "mask_token": "[MASK]",
48
  "model_max_length": 8192,
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
51
+ "tokenizer_class": "PreTrainedTokenizer",
52
  "unk_token": "[UNK]"
53
  }