Zero-Shot Image Classification
Transformers
Safetensors
siglip
vision
Inference Endpoints

Error while loading processor: TypeError: expected str, bytes or os.PathLike object, not NoneType

#2
by armamut - opened

I'm getting an error while loading processor.

from transformers import SiglipProcessor, SiglipModel
device = "cuda" # the device to load the model onto

ckpt = "google/siglip2-base-patch16-224"
processor = SiglipProcessor.from_pretrained(ckpt, trust_remote_code=True)
The tokenizer class you load from this checkpoint is 'GemmaTokenizer'. 
The class this function is called from is 'SiglipTokenizer'.
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[1], line 5
      2 device = "cuda" # the device to load the model onto
      4 ckpt = "google/siglip2-base-patch16-224"
----> 5 processor = SiglipProcessor.from_pretrained(ckpt, trust_remote_code=True)

File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1070, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
   1067 if token is not None:
   1068     kwargs["token"] = token
-> 1070 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
   1071 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
   1073 return cls.from_args_and_dict(args, processor_dict, **kwargs)

File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1116, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
   1113     else:
   1114         attribute_class = getattr(transformers_module, class_name)
-> 1116     args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
   1117 return args

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2052, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)
   2049     else:
   2050         logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2052 return cls._from_pretrained(
   2053     resolved_vocab_files,
   2054     pretrained_model_name_or_path,
   2055     init_configuration,
   2056     *init_inputs,
   2057     token=token,
   2058     cache_dir=cache_dir,
   2059     local_files_only=local_files_only,
   2060     _commit_hash=commit_hash,
   2061     _is_local=is_local,
   2062     trust_remote_code=trust_remote_code,
   2063     **kwargs,
   2064 )

File /opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2292, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, trust_remote_code, *init_inputs, **kwargs)
   2290 # Instantiate the tokenizer.
   2291 try:
-> 2292     tokenizer = cls(*init_inputs, **init_kwargs)
   2293 except import_protobuf_decode_error():
   2294     logger.info(
   2295         "Unable to load tokenizer model from SPM, loading from TikToken will be attempted instead."
   2296         "(Google protobuf error: Tried to load SPM model with non-SPM vocab file).",
   2297     )

File /opt/conda/lib/python3.10/site-packages/transformers/models/siglip/tokenization_siglip.py:123, in SiglipTokenizer.__init__(self, vocab_file, eos_token, unk_token, pad_token, additional_special_tokens, sp_model_kwargs, model_max_length, do_lower_case, **kwargs)
    120 self.do_lower_case = do_lower_case
    121 self.vocab_file = vocab_file
--> 123 self.sp_model = self.get_spm_processor()
    124 self.vocab_file = vocab_file
    126 super().__init__(
    127     eos_token=eos_token,
    128     unk_token=unk_token,
   (...)
    134     **kwargs,
    135 )

File /opt/conda/lib/python3.10/site-packages/transformers/models/siglip/tokenization_siglip.py:139, in SiglipTokenizer.get_spm_processor(self)
    137 def get_spm_processor(self):
    138     tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
--> 139     with open(self.vocab_file, "rb") as f:
    140         sp_model = f.read()
    141         model_pb2 = import_protobuf()

TypeError: expected str, bytes or os.PathLike object, not NoneType```
armamut changed discussion title from Error while loading model TypeError: expected str, bytes or os.PathLike object, not NoneType to Error while loading processor: TypeError: expected str, bytes or os.PathLike object, not NoneType

I have the same issue using the AutoModel and AutoProcessor classes (both from transformers-4.49.0 package)

You need to install the latest version of transformers, for now pip install git+https://github.com/huggingface/transformers

Sign up or log in to comment