RaushanTurganbay HF Staff commited on
Commit
4c9178e
·
1 Parent(s): 97f1e98
added_tokens.json CHANGED
@@ -10,6 +10,7 @@
10
  "<quad>": 151668,
11
  "<ref>": 151670,
12
  "<tool_call>": 151657,
 
13
  "<|box_end|>": 151649,
14
  "<|box_start|>": 151648,
15
  "<|endoftext|>": 151643,
 
10
  "<quad>": 151668,
11
  "<ref>": 151670,
12
  "<tool_call>": 151657,
13
+ "<video>": 151674,
14
  "<|box_end|>": 151649,
15
  "<|box_start|>": 151648,
16
  "<|endoftext|>": 151643,
chat_template.jinja CHANGED
@@ -1,5 +1,5 @@
1
  {% for message in messages %}{{'<|im_start|>' + message['role'] + '
2
- '}}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<image>
3
  ' }}{% elif content['type'] == 'video' %}{{ '<video>
4
  ' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{'<|im_end|>
5
  '}}{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant
 
1
  {% for message in messages %}{{'<|im_start|>' + message['role'] + '
2
+ '}}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<IMG_CONTEXT>
3
  ' }}{% elif content['type'] == 'video' %}{{ '<video>
4
  ' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{'<|im_end|>
5
  '}}{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant
processor_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "fake_image_token": "<image>",
3
- "fake_video_token": "<video>",
4
  "image_seq_length": 256,
5
  "processor_class": "InternVLProcessor"
6
  }
 
1
  {
 
 
2
  "image_seq_length": 256,
3
  "processor_class": "InternVLProcessor"
4
  }
special_tokens_map.json CHANGED
@@ -13,69 +13,15 @@
13
  "<|vision_pad|>",
14
  "<|image_pad|>",
15
  "<|video_pad|>",
16
- {
17
- "content": "<img>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- {
24
- "content": "</img>",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- {
31
- "content": "<IMG_CONTEXT>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- {
38
- "content": "<quad>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- {
45
- "content": "</quad>",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false
50
- },
51
- {
52
- "content": "<ref>",
53
- "lstrip": false,
54
- "normalized": false,
55
- "rstrip": false,
56
- "single_word": false
57
- },
58
- {
59
- "content": "</ref>",
60
- "lstrip": false,
61
- "normalized": false,
62
- "rstrip": false,
63
- "single_word": false
64
- },
65
- {
66
- "content": "<box>",
67
- "lstrip": false,
68
- "normalized": false,
69
- "rstrip": false,
70
- "single_word": false
71
- },
72
- {
73
- "content": "</box>",
74
- "lstrip": false,
75
- "normalized": false,
76
- "rstrip": false,
77
- "single_word": false
78
- }
79
  ],
80
  "context_image_token": "<IMG_CONTEXT>",
81
  "end_image_token": "</img>",
@@ -93,5 +39,6 @@
93
  "rstrip": false,
94
  "single_word": false
95
  },
96
- "start_image_token": "<img>"
 
97
  }
 
13
  "<|vision_pad|>",
14
  "<|image_pad|>",
15
  "<|video_pad|>",
16
+ "<img>",
17
+ "</img>",
18
+ "<IMG_CONTEXT>",
19
+ "<quad>",
20
+ "</quad>",
21
+ "<ref>",
22
+ "</ref>",
23
+ "<box>",
24
+ "</box>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  ],
26
  "context_image_token": "<IMG_CONTEXT>",
27
  "end_image_token": "</img>",
 
39
  "rstrip": false,
40
  "single_word": false
41
  },
42
+ "start_image_token": "<img>",
43
+ "video_token": "<video>"
44
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f9ba4b4a6625b5047a1356f6081b641c3e4e6a4a198facbd4bef217747d1685
3
- size 11423548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cc80b7e20adf8bf6f6ca442bf1abfac8056bb3b7d3e0b11c9d497d3e79398c9
3
+ size 11423732
tokenizer_config.json CHANGED
@@ -249,6 +249,14 @@
249
  "rstrip": false,
250
  "single_word": false,
251
  "special": true
 
 
 
 
 
 
 
 
252
  }
253
  },
254
  "additional_special_tokens": [
@@ -284,7 +292,8 @@
284
  "extra_special_tokens": {
285
  "context_image_token": "<IMG_CONTEXT>",
286
  "end_image_token": "</img>",
287
- "start_image_token": "<img>"
 
288
  },
289
  "model_max_length": 8192,
290
  "pad_token": "<|endoftext|>",
@@ -292,5 +301,6 @@
292
  "split_special_tokens": false,
293
  "start_image_token": "<img>",
294
  "tokenizer_class": "Qwen2Tokenizer",
295
- "unk_token": null
 
296
  }
 
249
  "rstrip": false,
250
  "single_word": false,
251
  "special": true
252
+ },
253
+ "151674": {
254
+ "content": "<video>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
  }
261
  },
262
  "additional_special_tokens": [
 
292
  "extra_special_tokens": {
293
  "context_image_token": "<IMG_CONTEXT>",
294
  "end_image_token": "</img>",
295
+ "start_image_token": "<img>",
296
+ "video_token": "<video>"
297
  },
298
  "model_max_length": 8192,
299
  "pad_token": "<|endoftext|>",
 
301
  "split_special_tokens": false,
302
  "start_image_token": "<img>",
303
  "tokenizer_class": "Qwen2Tokenizer",
304
+ "unk_token": null,
305
+ "video_token": "<video>"
306
  }