Alignment-Lab-AI commited on
Commit
acf5267
·
verified ·
1 Parent(s): 3729ebb

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +4 -46
tokenizer_config.json CHANGED
@@ -2047,58 +2047,16 @@
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
2050
- },
2051
- "128256": {
2052
- "content": "<s>",
2053
- "lstrip": false,
2054
- "normalized": false,
2055
- "rstrip": false,
2056
- "single_word": false,
2057
- "special": true
2058
- },
2059
- "128257": {
2060
- "content": "</s>",
2061
- "lstrip": false,
2062
- "normalized": false,
2063
- "rstrip": false,
2064
- "single_word": false,
2065
- "special": true
2066
- },
2067
- "128258": {
2068
- "content": "<unk>",
2069
- "lstrip": false,
2070
- "normalized": false,
2071
- "rstrip": false,
2072
- "single_word": false,
2073
- "special": true
2074
- },
2075
- "128259": {
2076
- "content": "<|im_start|>",
2077
- "lstrip": false,
2078
- "normalized": false,
2079
- "rstrip": false,
2080
- "single_word": false,
2081
- "special": false
2082
- },
2083
- "128260": {
2084
- "content": "<|im_end|>",
2085
- "lstrip": false,
2086
- "normalized": false,
2087
- "rstrip": false,
2088
- "single_word": false,
2089
- "special": false
2090
  }
2091
  },
2092
- "bos_token": "<s>",
2093
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
2094
  "clean_up_tokenization_spaces": true,
2095
- "eos_token": "</s>",
2096
  "model_input_names": [
2097
  "input_ids",
2098
  "attention_mask"
2099
  ],
2100
  "model_max_length": 1000000000000000019884624838656,
2101
- "pad_token": "<|end_of_text|>",
2102
- "tokenizer_class": "PreTrainedTokenizerFast",
2103
- "unk_token": "<unk>"
2104
  }
 
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2050
  }
2051
  },
2052
+ "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% else %}{{ eos_token }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|end_of_text|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 1000000000000000019884624838656,
2061
+ "tokenizer_class": "PreTrainedTokenizerFast"
 
 
2062
  }