jinaai
/

jina-embeddings-v4

@@ -409,7 +409,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
     def encode_texts(
         self,
-        texts: List[str],
         task: Optional[str] = None,
         max_length: int = 8192,
         batch_size: int = 8,
@@ -422,7 +422,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         Encodes a list of texts into embeddings.
         Args:
-            texts: List of text strings to encode
             max_length: Maximum token length for text processing
             batch_size: Number of texts to process at once
             vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
@@ -446,6 +446,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             prefix=encode_kwargs.pop("prefix"),
         )
         embeddings = self._process_batches(
             data=texts,
             processor_fn=processor_fn,
@@ -474,7 +477,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
     def encode_images(
         self,
-        images: List[Union[str, Image.Image]],
         task: Optional[str] = None,
         batch_size: int = 8,
         vector_type: Optional[str] = None,
@@ -486,7 +489,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         Encodes a list of images into embeddings.
         Args:
-            images: List of PIL images, URLs, or local file paths to encode
             batch_size: Number of images to process at once
             vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
             return_numpy: Whether to return numpy arrays instead of torch tensors
@@ -503,6 +506,11 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             )
         encode_kwargs = self._validate_encoding_params(vector_type, truncate_dim)
         task = self._validate_task(task)
         images = self._load_images_if_needed(images)
         embeddings = self._process_batches(
             data=images,

     def encode_texts(
         self,
+        texts: Union[str, List[str]],
         task: Optional[str] = None,
         max_length: int = 8192,
         batch_size: int = 8,
         Encodes a list of texts into embeddings.
         Args:
+            texts: text or list of text strings to encode
             max_length: Maximum token length for text processing
             batch_size: Number of texts to process at once
             vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
             prefix=encode_kwargs.pop("prefix"),
         )
+        if isinstance(texts, str):
+            texts = [texts]
         embeddings = self._process_batches(
             data=texts,
             processor_fn=processor_fn,
     def encode_images(
         self,
+        images: Union[str, Image.Image, List[Union[str, Image.Image]]],
         task: Optional[str] = None,
         batch_size: int = 8,
         vector_type: Optional[str] = None,
         Encodes a list of images into embeddings.
         Args:
+            images: image(s) to encode, can be PIL Image(s), URL(s), or local file path(s)
             batch_size: Number of images to process at once
             vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
             return_numpy: Whether to return numpy arrays instead of torch tensors
             )
         encode_kwargs = self._validate_encoding_params(vector_type, truncate_dim)
         task = self._validate_task(task)
+        # Convert single image to list
+        if isinstance(images, (str, Image.Image)):
+            images = [images]
         images = self._load_images_if_needed(images)
         embeddings = self._process_batches(
             data=images,