hezhihui
commited on
Commit
·
b352d20
1
Parent(s):
9403e15
update chat msgs
Browse files- modeling_minicpmv.py +7 -7
modeling_minicpmv.py
CHANGED
|
@@ -1,13 +1,10 @@
|
|
| 1 |
import math
|
| 2 |
-
from typing import List, Optional
|
| 3 |
import json
|
| 4 |
import torch
|
| 5 |
-
import torchvision
|
| 6 |
from threading import Thread
|
| 7 |
from copy import deepcopy
|
| 8 |
-
from PIL import Image
|
| 9 |
from torchvision import transforms
|
| 10 |
-
from transformers import
|
| 11 |
from transformers.models.idefics2.modeling_idefics2 import Idefics2VisionTransformer
|
| 12 |
from transformers import AutoProcessor
|
| 13 |
|
|
@@ -91,7 +88,9 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
|
|
| 91 |
img_cnt = []
|
| 92 |
for pixel_values in pixel_values_list:
|
| 93 |
img_cnt.append(len(pixel_values))
|
| 94 |
-
all_pixel_values.extend([i.flatten(end_dim=1).permute(1, 0) for i in pixel_values])
|
|
|
|
|
|
|
| 95 |
if all_pixel_values:
|
| 96 |
tgt_sizes = torch.vstack(tgt_sizes).type(torch.int32)
|
| 97 |
|
|
@@ -290,17 +289,18 @@ class MiniCPMV(MiniCPMVPreTrainedModel):
|
|
| 290 |
processor = AutoProcessor.from_pretrained(self.config._name_or_path, trust_remote_code=True)
|
| 291 |
if isinstance(msgs, str):
|
| 292 |
msgs = json.loads(msgs)
|
|
|
|
| 293 |
|
| 294 |
assert len(msgs) > 0, 'msgs is empty'
|
| 295 |
assert sampling or not stream, 'if use stream mode, make sure sampling=True'
|
| 296 |
|
| 297 |
if image is not None and isinstance(msgs[0]['content'], str):
|
| 298 |
-
|
| 299 |
if system_prompt:
|
| 300 |
sys_msg = {'role': 'system', 'content': system_prompt}
|
| 301 |
copy_msgs = [sys_msg] + copy_msgs
|
| 302 |
|
| 303 |
-
prompt = processor.tokenizer.apply_chat_template(
|
| 304 |
inputs = processor(prompt, [image], return_tensors="pt", max_length=max_inp_length).to(self.device)
|
| 305 |
|
| 306 |
if sampling:
|
|
|
|
| 1 |
import math
|
|
|
|
| 2 |
import json
|
| 3 |
import torch
|
|
|
|
| 4 |
from threading import Thread
|
| 5 |
from copy import deepcopy
|
|
|
|
| 6 |
from torchvision import transforms
|
| 7 |
+
from transformers import LlamaPreTrainedModel, LlamaForCausalLM, TextIteratorStreamer
|
| 8 |
from transformers.models.idefics2.modeling_idefics2 import Idefics2VisionTransformer
|
| 9 |
from transformers import AutoProcessor
|
| 10 |
|
|
|
|
| 88 |
img_cnt = []
|
| 89 |
for pixel_values in pixel_values_list:
|
| 90 |
img_cnt.append(len(pixel_values))
|
| 91 |
+
all_pixel_values.extend([i.flatten(end_dim=1).permute(1, 0) for i in pixel_values])
|
| 92 |
+
|
| 93 |
+
# exist image
|
| 94 |
if all_pixel_values:
|
| 95 |
tgt_sizes = torch.vstack(tgt_sizes).type(torch.int32)
|
| 96 |
|
|
|
|
| 289 |
processor = AutoProcessor.from_pretrained(self.config._name_or_path, trust_remote_code=True)
|
| 290 |
if isinstance(msgs, str):
|
| 291 |
msgs = json.loads(msgs)
|
| 292 |
+
copy_msgs = deepcopy(msgs)
|
| 293 |
|
| 294 |
assert len(msgs) > 0, 'msgs is empty'
|
| 295 |
assert sampling or not stream, 'if use stream mode, make sure sampling=True'
|
| 296 |
|
| 297 |
if image is not None and isinstance(msgs[0]['content'], str):
|
| 298 |
+
copy_msgs[0]['content'] = '(<image>./</image>)\n' + copy_msgs[0]['content']
|
| 299 |
if system_prompt:
|
| 300 |
sys_msg = {'role': 'system', 'content': system_prompt}
|
| 301 |
copy_msgs = [sys_msg] + copy_msgs
|
| 302 |
|
| 303 |
+
prompt = processor.tokenizer.apply_chat_template(copy_msgs, tokenize=False, add_generation_prompt=True)
|
| 304 |
inputs = processor(prompt, [image], return_tensors="pt", max_length=max_inp_length).to(self.device)
|
| 305 |
|
| 306 |
if sampling:
|