yonigozlan HF Staff commited on
Commit
55919a2
·
verified ·
1 Parent(s): 0fb2222

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +3 -0
  2. tokenizer_config.json +8 -2
special_tokens_map.json CHANGED
@@ -23,6 +23,8 @@
23
  "rstrip": false,
24
  "single_word": false
25
  },
 
 
26
  "eos_token": {
27
  "content": "</s>",
28
  "lstrip": false,
@@ -37,6 +39,7 @@
37
  "rstrip": false,
38
  "single_word": false
39
  },
 
40
  "unk_token": {
41
  "content": "<unk>",
42
  "lstrip": false,
 
23
  "rstrip": false,
24
  "single_word": false
25
  },
26
+ "context_image_token": "<IMG_CONTEXT>",
27
+ "end_image_token": "</img>",
28
  "eos_token": {
29
  "content": "</s>",
30
  "lstrip": false,
 
39
  "rstrip": false,
40
  "single_word": false
41
  },
42
+ "start_image_token": "<img>",
43
  "unk_token": {
44
  "content": "<unk>",
45
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -1661,16 +1661,22 @@
1661
  },
1662
  "bos_token": "<s>",
1663
  "clean_up_tokenization_spaces": false,
 
 
1664
  "eos_token": "</s>",
1665
- "extra_special_tokens": {},
 
 
 
 
1666
  "legacy": false,
1667
  "model_max_length": 8192,
1668
  "pad_token": "</s>",
1669
  "prepend_scheme": "never",
1670
- "processor_class": "InternVLProcessor",
1671
  "return_token_type_ids": false,
1672
  "sp_model_kwargs": {},
1673
  "spaces_between_special_tokens": false,
 
1674
  "tokenizer_class": "LlamaTokenizer",
1675
  "unk_token": "<unk>",
1676
  "use_default_system_prompt": false
 
1661
  },
1662
  "bos_token": "<s>",
1663
  "clean_up_tokenization_spaces": false,
1664
+ "context_image_token": "<IMG_CONTEXT>",
1665
+ "end_image_token": "</img>",
1666
  "eos_token": "</s>",
1667
+ "extra_special_tokens": {
1668
+ "context_image_token": "<IMG_CONTEXT>",
1669
+ "end_image_token": "</img>",
1670
+ "start_image_token": "<img>"
1671
+ },
1672
  "legacy": false,
1673
  "model_max_length": 8192,
1674
  "pad_token": "</s>",
1675
  "prepend_scheme": "never",
 
1676
  "return_token_type_ids": false,
1677
  "sp_model_kwargs": {},
1678
  "spaces_between_special_tokens": false,
1679
+ "start_image_token": "<img>",
1680
  "tokenizer_class": "LlamaTokenizer",
1681
  "unk_token": "<unk>",
1682
  "use_default_system_prompt": false