nicolauduran45 commited on
Commit
7c12d9c
·
verified ·
1 Parent(s): bc3f6ef

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +1 -2
tokenizer_config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "add_prefix_space": true,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<s>",
@@ -46,11 +45,11 @@
46
  "clean_up_tokenization_spaces": true,
47
  "cls_token": "<s>",
48
  "eos_token": "</s>",
 
49
  "mask_token": "<mask>",
50
  "model_max_length": 512,
51
  "pad_token": "<pad>",
52
  "sep_token": "</s>",
53
  "tokenizer_class": "XLMRobertaTokenizer",
54
- "trim_offsets": true,
55
  "unk_token": "<unk>"
56
  }
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<s>",
 
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
47
  "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
  "model_max_length": 512,
51
  "pad_token": "<pad>",
52
  "sep_token": "</s>",
53
  "tokenizer_class": "XLMRobertaTokenizer",
 
54
  "unk_token": "<unk>"
55
  }