jinaai
/

jina-embeddings-v4

@@ -33,7 +33,7 @@ model.set_task(task='retrieval')
 # Generate multi-vector embeddings
 img_embeddings = model.encode_images(images=images, vector_type='multi_vector')
-text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', text_type='passage')
 # Example 3: Code task with single vector embeddings
 model.set_task(task='code')

 # Generate multi-vector embeddings
 img_embeddings = model.encode_images(images=images, vector_type='multi_vector')
+text_embeddings = model.encode_texts(texts=texts, vector_type='multi_vector', prompt_name='passage')
 # Example 3: Code task with single vector embeddings
 model.set_task(task='code')

modeling_jina_embeddings_v4.py CHANGED Viewed

@@ -331,17 +331,17 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         self,
         vector_type: Optional[str] = None,
         truncate_dim: Optional[int] = None,
-        text_type: Optional[str] = None,
     ) -> Dict[str, Any]:
         encode_kwargs = {}
-        if text_type is not None:
-            if text_type not in PREFIX_DICT:
                 raise ValueError(
-                    f"Invalid text_type: {text_type}. Must be one of {list(PREFIX_DICT.keys())}."
                 )
             else:
                 encode_kwargs["prefix"] = (
-                    PREFIX_DICT[text_type]
                     if self.task != TaskType.text_matching
                     else PREFIX_DICT["query"]
                 )
@@ -372,7 +372,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         vector_type: Optional[str] = None,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
-        text_type: Optional[str] = None,
     ) -> List[torch.Tensor]:
         """
         Encodes a list of texts into embeddings.
@@ -384,14 +384,14 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
-            text_type: Type of text being encoded ('query' or 'passage')
         Returns:
             List of text embeddings as tensors or numpy arrays
         """
-        text_type = text_type or "query"
         encode_kwargs = self._validate_encoding_params(
-            vector_type, truncate_dim, text_type
         )
         processor_fn = partial(

         self,
         vector_type: Optional[str] = None,
         truncate_dim: Optional[int] = None,
+        prompt_name: Optional[str] = None,
     ) -> Dict[str, Any]:
         encode_kwargs = {}
+        if prompt_name is not None:
+            if prompt_name not in PREFIX_DICT:
                 raise ValueError(
+                    f"Invalid prompt_name: {prompt_name}. Must be one of {list(PREFIX_DICT.keys())}."
                 )
             else:
                 encode_kwargs["prefix"] = (
+                    PREFIX_DICT[prompt_name]
                     if self.task != TaskType.text_matching
                     else PREFIX_DICT["query"]
                 )
         vector_type: Optional[str] = None,
         return_numpy: bool = False,
         truncate_dim: Optional[int] = None,
+        prompt_name: Optional[str] = None,
     ) -> List[torch.Tensor]:
         """
         Encodes a list of texts into embeddings.
             vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
             return_numpy: Whether to return numpy arrays instead of torch tensors
             truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
+            prompt_name: Type of text being encoded ('query' or 'passage')
         Returns:
             List of text embeddings as tensors or numpy arrays
         """
+        prompt_name = prompt_name or "query"
         encode_kwargs = self._validate_encoding_params(
+            vector_type, truncate_dim, prompt_name
         )
         processor_fn = partial(

preprocessor_config.json CHANGED Viewed

@@ -14,7 +14,7 @@
     0.26130258,
     0.27577711
   ],
-  "max_pixels": 12845056,
   "merge_size": 2,
   "min_pixels": 3136,
   "patch_size": 14,
@@ -22,7 +22,7 @@
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
-    "longest_edge": 12845056,
     "shortest_edge": 3136
   },
   "temporal_patch_size": 2,

     0.26130258,
     0.27577711
   ],
+  "max_pixels": 602112,
   "merge_size": 2,
   "min_pixels": 3136,
   "patch_size": 14,
   "resample": 3,
   "rescale_factor": 0.00392156862745098,
   "size": {
+    "longest_edge": 602112,
     "shortest_edge": 3136
   },
   "temporal_patch_size": 2,