Image features and image tokens do not match
#4
by
arpitjjw
- opened
I am trying to finetune InternVL3 using transformers and I am getting this error:
Traceback (most recent call last):
File "/workspace/finetune_vlm.py", line 211, in <module>
main(p.parse_args())
File "/workspace/finetune_vlm.py", line 200, in main
trainer.train()
File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2238, in train
return inner_training_loop(
File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 2553, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 3728, in training_step
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
File "/usr/local/lib/python3.10/dist-packages/transformers/trainer.py", line 3793, in compute_loss
outputs = model(**inputs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py", line 814, in forward
return model_forward(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/accelerate/utils/operations.py", line 802, in __call__
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/usr/local/lib/python3.10/dist-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/internvl/modeling_internvl.py", line 967, in forward
raise ValueError(
ValueError: Image features and image tokens do not match: tokens: 3584, features 512
0%| | 0/190 [00:04<?, ?it/s]