Error while loading processor: TypeError: expected str, bytes or os.PathLike object, not NoneType
#2
by
armamut
- opened
I'm getting an error while loading processor.
from transformers import SiglipProcessor, SiglipModel
device = "cuda" # the device to load the model onto
ckpt = "google/siglip2-base-patch16-224"
processor = SiglipProcessor.from_pretrained(ckpt, trust_remote_code=True)
The tokenizer class you load from this checkpoint is 'GemmaTokenizer'.
The class this function is called from is 'SiglipTokenizer'.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[1], line 5
2 device = "cuda" # the device to load the model onto
4 ckpt = "google/siglip2-base-patch16-224"
----> 5 processor = SiglipProcessor.from_pretrained(ckpt, trust_remote_code=True)
File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1070, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1067 if token is not None:
1068 kwargs["token"] = token
-> 1070 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
1071 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
1073 return cls.from_args_and_dict(args, processor_dict, **kwargs)
File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1116, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
1113 else:
1114 attribute_class = getattr(transformers_module, class_name)
-> 1116 args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
1117 return args
File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2052, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)
2049 else:
2050 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2052 return cls._from_pretrained(
2053 resolved_vocab_files,
2054 pretrained_model_name_or_path,
2055 init_configuration,
2056 *init_inputs,
2057 token=token,
2058 cache_dir=cache_dir,
2059 local_files_only=local_files_only,
2060 _commit_hash=commit_hash,
2061 _is_local=is_local,
2062 trust_remote_code=trust_remote_code,
2063 **kwargs,
2064 )
File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2292, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, trust_remote_code, *init_inputs, **kwargs)
2290 # Instantiate the tokenizer.
2291 try:
-> 2292 tokenizer = cls(*init_inputs, **init_kwargs)
2293 except import_protobuf_decode_error():
2294 logger.info(
2295 "Unable to load tokenizer model from SPM, loading from TikToken will be attempted instead."
2296 "(Google protobuf error: Tried to load SPM model with non-SPM vocab file).",
2297 )
File /opt/conda/lib/python3.10/site-packages/transformers/models/siglip/tokenization_siglip.py:123, in SiglipTokenizer.__init__(self, vocab_file, eos_token, unk_token, pad_token, additional_special_tokens, sp_model_kwargs, model_max_length, do_lower_case, **kwargs)
120 self.do_lower_case = do_lower_case
121 self.vocab_file = vocab_file
--> 123 self.sp_model = self.get_spm_processor()
124 self.vocab_file = vocab_file
126 super().__init__(
127 eos_token=eos_token,
128 unk_token=unk_token,
(...)
134 **kwargs,
135 )
File /opt/conda/lib/python3.10/site-packages/transformers/models/siglip/tokenization_siglip.py:139, in SiglipTokenizer.get_spm_processor(self)
137 def get_spm_processor(self):
138 tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
--> 139 with open(self.vocab_file, "rb") as f:
140 sp_model = f.read()
141 model_pb2 = import_protobuf()
TypeError: expected str, bytes or os.PathLike object, not NoneType```
armamut
changed discussion title from
Error while loading model TypeError: expected str, bytes or os.PathLike object, not NoneType
to Error while loading processor: TypeError: expected str, bytes or os.PathLike object, not NoneType
I have the same issue using the AutoModel and AutoProcessor classes (both from transformers-4.49.0 package)
You need to install the latest version of transformers, for now pip install git+https://github.com/huggingface/transformers