lihongjie commited on
Commit
891634b
·
1 Parent(s): df33ba5

支持多图和任意分辨率

Browse files
Files changed (1) hide show
  1. qwen2_tokenizer_images.py +5 -4
qwen2_tokenizer_images.py CHANGED
@@ -79,14 +79,15 @@ class Tokenizer_Http():
79
 
80
  def encode(self, content):
81
  text = [f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n']
82
-
83
  input_ids = self.tokenizer(text)
84
  return input_ids["input_ids"][0]
85
 
86
- def encode_vpm(self, content="Describe this image."):
87
 
88
  # official implementation
89
- text = f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<|vision_start|>' + '<|image_pad|>' * 256 + f'<|vision_end|>{content}<|im_end|>\n<|im_start|>assistant\n'
 
 
90
 
91
  output_kwargs = {'text_kwargs': {'padding': True, 'return_tensors': 'pt'}, 'images_kwargs': {'return_tensors': 'pt'}, 'audio_kwargs': {'padding': True, 'return_tensors': 'pt'}, 'videos_kwargs': {'fps': 2.0, 'return_tensors': 'pt'}, 'common_kwargs': {'return_tensors': 'pt'}}
92
 
@@ -205,7 +206,7 @@ class Request(BaseHTTPRequestHandler):
205
  if 'img_prompt' in req:
206
  b_img_prompt = req['img_prompt']
207
  if b_img_prompt:
208
- token_ids = tokenizer.encode_vpm(prompt)
209
  else:
210
  token_ids = tokenizer.encode(prompt)
211
 
 
79
 
80
  def encode(self, content):
81
  text = [f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{content}<|im_end|>\n<|im_start|>assistant\n']
 
82
  input_ids = self.tokenizer(text)
83
  return input_ids["input_ids"][0]
84
 
85
+ def encode_vpm(self, content="Describe this image.", num_img=1, img_token_num=256):
86
 
87
  # official implementation
88
+ imgs_token = '<|vision_start|>' + '<|image_pad|>'*img_token_num + '<|vision_end|>'
89
+ imgs_token *= num_img
90
+ text = f'<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{imgs_token}{content}<|im_end|>\n<|im_start|>assistant\n'
91
 
92
  output_kwargs = {'text_kwargs': {'padding': True, 'return_tensors': 'pt'}, 'images_kwargs': {'return_tensors': 'pt'}, 'audio_kwargs': {'padding': True, 'return_tensors': 'pt'}, 'videos_kwargs': {'fps': 2.0, 'return_tensors': 'pt'}, 'common_kwargs': {'return_tensors': 'pt'}}
93
 
 
206
  if 'img_prompt' in req:
207
  b_img_prompt = req['img_prompt']
208
  if b_img_prompt:
209
+ token_ids = tokenizer.encode_vpm(prompt, req["num_img"], req["img_token_num"])
210
  else:
211
  token_ids = tokenizer.encode(prompt)
212