Transformers
English
pszemraj commited on
Commit
dac1209
·
verified ·
1 Parent(s): ee8c40e

validate and set bos/eos/pad token_ids

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +2 -0
  2. tokenizer_config.json +2 -0
special_tokens_map.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "cls_token": {
3
  "content": "[CLS]",
4
  "lstrip": false,
@@ -6,6 +7,7 @@
6
  "rstrip": false,
7
  "single_word": false
8
  },
 
9
  "mask_token": {
10
  "content": "[MASK]",
11
  "lstrip": false,
 
1
  {
2
+ "bos_token": "[CLS]",
3
  "cls_token": {
4
  "content": "[CLS]",
5
  "lstrip": false,
 
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
+ "eos_token": "[SEP]",
11
  "mask_token": {
12
  "content": "[MASK]",
13
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -41,8 +41,10 @@
41
  "special": true
42
  }
43
  },
 
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
 
46
  "extra_special_tokens": {},
47
  "mask_token": "[MASK]",
48
  "model_max_length": 1000000000.0,
 
41
  "special": true
42
  }
43
  },
44
+ "bos_token": "[CLS]",
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "[CLS]",
47
+ "eos_token": "[SEP]",
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
  "model_max_length": 1000000000.0,