yuyuzhang commited on
Commit
cb9d19e
·
verified ·
1 Parent(s): c7dbb38

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +13 -13
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "<[BOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
@@ -9,7 +9,7 @@
9
  "special": true
10
  },
11
  "1": {
12
- "content": "<[PAD_never_used_51bce0c785ca2f68081bfa7d91973934]>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -17,7 +17,7 @@
17
  "special": true
18
  },
19
  "2": {
20
- "content": "<[EOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
@@ -49,7 +49,7 @@
49
  "special": true
50
  },
51
  "6": {
52
- "content": "<[SEP_never_used_51bce0c785ca2f68081bfa7d91973934]>",
53
  "lstrip": false,
54
  "normalized": false,
55
  "rstrip": false,
@@ -993,7 +993,7 @@
993
  "special": true
994
  },
995
  "124": {
996
- "content": "<[PLHD124_never_used_51bce0c785ca2f68081bfa7d91973934]>",
997
  "lstrip": false,
998
  "normalized": false,
999
  "rstrip": false,
@@ -1001,7 +1001,7 @@
1001
  "special": true
1002
  },
1003
  "125": {
1004
- "content": "<[PLHD125_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1005
  "lstrip": false,
1006
  "normalized": false,
1007
  "rstrip": false,
@@ -1009,7 +1009,7 @@
1009
  "special": true
1010
  },
1011
  "126": {
1012
- "content": "<[PLHD126_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1013
  "lstrip": false,
1014
  "normalized": false,
1015
  "rstrip": false,
@@ -1025,13 +1025,13 @@
1025
  "special": true
1026
  }
1027
  },
1028
- "bos_token": "<[BOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1029
  "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ bos_token + role + '\n' + message['content'] | trim + eos_token }}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\n'}}{% endif %}",
1030
  "clean_up_tokenization_spaces": false,
1031
- "eos_token": "<[EOS_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1032
  "extra_special_tokens": {},
1033
  "model_max_length": 65536,
1034
- "pad_token": "<[PAD_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1035
- "sep_token": "<[SEP_never_used_51bce0c785ca2f68081bfa7d91973934]>",
1036
- "tokenizer_class": "PreTrainedTokenizer"
1037
- }
 
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
+ "content": "<[begin▁of▁sentence]>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
 
9
  "special": true
10
  },
11
  "1": {
12
+ "content": "<[PAD▁TOKEN]>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
 
17
  "special": true
18
  },
19
  "2": {
20
+ "content": "<[end▁of▁sentence]>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
49
  "special": true
50
  },
51
  "6": {
52
+ "content": "<[SEP▁TOKEN]>",
53
  "lstrip": false,
54
  "normalized": false,
55
  "rstrip": false,
 
993
  "special": true
994
  },
995
  "124": {
996
+ "content": "<[fim-prefix]>",
997
  "lstrip": false,
998
  "normalized": false,
999
  "rstrip": false,
 
1001
  "special": true
1002
  },
1003
  "125": {
1004
+ "content": "<[fim-suffix]>",
1005
  "lstrip": false,
1006
  "normalized": false,
1007
  "rstrip": false,
 
1009
  "special": true
1010
  },
1011
  "126": {
1012
+ "content": "<[fim-middle]>",
1013
  "lstrip": false,
1014
  "normalized": false,
1015
  "rstrip": false,
 
1025
  "special": true
1026
  }
1027
  },
1028
+ "bos_token": "<[begin▁of▁sentence]>",
1029
  "chat_template": "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set role = message['role'] %}{{ bos_token + role + '\n' + message['content'] | trim + eos_token }}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\n'}}{% endif %}",
1030
  "clean_up_tokenization_spaces": false,
1031
+ "eos_token": "<[end▁of▁sentence]>",
1032
  "extra_special_tokens": {},
1033
  "model_max_length": 65536,
1034
+ "pad_token": "<[PAD▁TOKEN]>",
1035
+ "sep_token": "<[SEP▁TOKEN]>",
1036
+ "tokenizer_class": "PreTrainedTokenizerFast"
1037
+ }