jinaai
/

jina-embeddings-v4

@@ -155,7 +155,7 @@ from transformers import AutoModel
 import torch
 # Initialize the model
-model = AutoModel.from_pretrained("jinaai/jina-embeddings-v4", trust_remote_code=True)
 model.to("cuda")

 import torch
 # Initialize the model
+model = AutoModel.from_pretrained("jinaai/jina-embeddings-v4", trust_remote_code=True, torch_dtype=torch.float16)
 model.to("cuda")

custom_st.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from io import BytesIO
 from pathlib import Path
 from typing import Any, Dict, List, Literal, Optional, Union
@@ -104,7 +106,10 @@ class Transformer(nn.Module):
         return encoding
     def forward(
-        self, features: Dict[str, torch.Tensor], task: Optional[str] = None, truncate_dim: Optional[int] = None
     ) -> Dict[str, torch.Tensor]:
         self.model.eval()
@@ -138,8 +143,10 @@ class Transformer(nn.Module):
                         **text_batch, task_label=task
                     ).single_vec_emb
                     if truncate_dim:
-                        text_embeddings = text_embeddings[:, : truncate_dim]
-                        text_embeddings = torch.nn.functional.normalize(text_embeddings, p=2, dim=-1)
                 for i, embedding in enumerate(text_embeddings):
                     all_embeddings.append((text_indices[i], embedding))
@@ -156,8 +163,10 @@ class Transformer(nn.Module):
                         **image_batch, task_label=task
                     ).single_vec_emb
                     if truncate_dim:
-                        img_embeddings = img_embeddings[:, : truncate_dim]
-                        img_embeddings = torch.nn.functional.normalize(img_embeddings, p=2, dim=-1)
                 for i, embedding in enumerate(img_embeddings):
                     all_embeddings.append((image_indices[i], embedding))
@@ -170,3 +179,7 @@ class Transformer(nn.Module):
         features["sentence_embedding"] = combined_embeddings
         return features

+import json
+import os
 from io import BytesIO
 from pathlib import Path
 from typing import Any, Dict, List, Literal, Optional, Union
         return encoding
     def forward(
+        self,
+        features: Dict[str, torch.Tensor],
+        task: Optional[str] = None,
+        truncate_dim: Optional[int] = None,
     ) -> Dict[str, torch.Tensor]:
         self.model.eval()
                         **text_batch, task_label=task
                     ).single_vec_emb
                     if truncate_dim:
+                        text_embeddings = text_embeddings[:, :truncate_dim]
+                        text_embeddings = torch.nn.functional.normalize(
+                            text_embeddings, p=2, dim=-1
+                        )
                 for i, embedding in enumerate(text_embeddings):
                     all_embeddings.append((text_indices[i], embedding))
                         **image_batch, task_label=task
                     ).single_vec_emb
                     if truncate_dim:
+                        img_embeddings = img_embeddings[:, :truncate_dim]
+                        img_embeddings = torch.nn.functional.normalize(
+                            img_embeddings, p=2, dim=-1
+                        )
                 for i, embedding in enumerate(img_embeddings):
                     all_embeddings.append((image_indices[i], embedding))
         features["sentence_embedding"] = combined_embeddings
         return features
+    @classmethod
+    def load(cls, input_path: str) -> "Transformer":
+        return cls(model_name_or_path=input_path)

modeling_jina_embeddings_v4.py CHANGED Viewed

@@ -242,7 +242,6 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             pooled_output = masked_hidden_states.sum(dim=1) / image_mask.sum(
                 dim=1, keepdim=True
             )
         else:  # got query text
             pooled_output = torch.sum(
                 hidden_states * attention_mask.unsqueeze(-1), dim=1
@@ -332,7 +331,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             collate_fn=processor_fn,
         )
         if return_multivector and len(data) > 1:
-            assert not return_numpy, "`return_numpy` is not supported when `return_multivector=True` and more than one data is encoded"
         results = []
         self.eval()
         for batch in tqdm(dataloader, desc=desc):
@@ -346,10 +347,12 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
                         embeddings = embeddings.single_vec_emb
                         if truncate_dim is not None:
                             embeddings = embeddings[:, :truncate_dim]
-                            embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=-1)
                     else:
                         embeddings = embeddings.multi_vec_emb
                     if return_multivector and not return_numpy:
                         valid_tokens = batch["attention_mask"].bool()
                         embeddings = [
@@ -436,7 +439,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             List of text embeddings as tensors or numpy arrays when encoding multiple texts, or single text embedding as tensor when encoding a single text
         """
         prompt_name = prompt_name or "query"
-        encode_kwargs = self._validate_encoding_params(truncate_dim=truncate_dim, prompt_name=prompt_name)
         task = self._validate_task(task)
@@ -451,9 +456,11 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
         # If return_multivector is True and encoding multiple texts, ignore return_numpy
         if return_multivector and return_list and len(texts) > 1:
             if return_numpy:
-                print("Warning: `return_numpy` is ignored when `return_multivector=True` and `len(texts) > 1`")
             return_numpy = False
         if isinstance(texts, str):
             texts = [texts]
@@ -468,7 +475,7 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             **encode_kwargs,
         )
-        return embeddings if return_list else embeddings[0]
     def _load_images_if_needed(
         self, images: List[Union[str, Image.Image]]
@@ -515,19 +522,21 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             )
         encode_kwargs = self._validate_encoding_params(truncate_dim=truncate_dim)
         task = self._validate_task(task)
         return_list = isinstance(images, list)
         # If return_multivector is True and encoding multiple images, ignore return_numpy
         if return_multivector and return_list and len(images) > 1:
             if return_numpy:
-                print("Warning: `return_numpy` is ignored when `return_multivector=True` and `len(images) > 1`")
             return_numpy = False
         # Convert single image to list
         if isinstance(images, (str, Image.Image)):
             images = [images]
         images = self._load_images_if_needed(images)
         embeddings = self._process_batches(
             data=images,
@@ -588,18 +597,12 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
             config=lora_config,
         )
-        @property
-        def task(self):
             return self.model.task
-        @task.setter
-        def task(self, value):
             self.model.task = value
-        peft_model.task = property(task.fget, task.fset)
-        peft_model.__class__.task = property(
-            lambda self: self.model.task,
-            lambda self, value: setattr(self.model, "task", value),
-        )
         return peft_model

             pooled_output = masked_hidden_states.sum(dim=1) / image_mask.sum(
                 dim=1, keepdim=True
             )
         else:  # got query text
             pooled_output = torch.sum(
                 hidden_states * attention_mask.unsqueeze(-1), dim=1
             collate_fn=processor_fn,
         )
         if return_multivector and len(data) > 1:
+            assert (
+                not return_numpy
+            ), "`return_numpy` is not supported when `return_multivector=True` and more than one data is encoded"
         results = []
         self.eval()
         for batch in tqdm(dataloader, desc=desc):
                         embeddings = embeddings.single_vec_emb
                         if truncate_dim is not None:
                             embeddings = embeddings[:, :truncate_dim]
+                            embeddings = torch.nn.functional.normalize(
+                                embeddings, p=2, dim=-1
+                            )
                     else:
                         embeddings = embeddings.multi_vec_emb
                     if return_multivector and not return_numpy:
                         valid_tokens = batch["attention_mask"].bool()
                         embeddings = [
             List of text embeddings as tensors or numpy arrays when encoding multiple texts, or single text embedding as tensor when encoding a single text
         """
         prompt_name = prompt_name or "query"
+        encode_kwargs = self._validate_encoding_params(
+            truncate_dim=truncate_dim, prompt_name=prompt_name
+        )
         task = self._validate_task(task)
         # If return_multivector is True and encoding multiple texts, ignore return_numpy
         if return_multivector and return_list and len(texts) > 1:
             if return_numpy:
+                print(
+                    "Warning: `return_numpy` is ignored when `return_multivector=True` and `len(texts) > 1`"
+                )
             return_numpy = False
         if isinstance(texts, str):
             texts = [texts]
             **encode_kwargs,
         )
+        return embeddings if return_list else embeddings[0]
     def _load_images_if_needed(
         self, images: List[Union[str, Image.Image]]
             )
         encode_kwargs = self._validate_encoding_params(truncate_dim=truncate_dim)
         task = self._validate_task(task)
         return_list = isinstance(images, list)
         # If return_multivector is True and encoding multiple images, ignore return_numpy
         if return_multivector and return_list and len(images) > 1:
             if return_numpy:
+                print(
+                    "Warning: `return_numpy` is ignored when `return_multivector=True` and `len(images) > 1`"
+                )
             return_numpy = False
         # Convert single image to list
         if isinstance(images, (str, Image.Image)):
             images = [images]
         images = self._load_images_if_needed(images)
         embeddings = self._process_batches(
             data=images,
             config=lora_config,
         )
+        def task_getter(self):
             return self.model.task
+        def task_setter(self, value):
             self.model.task = value
+        peft_model.__class__.task = property(task_getter, task_setter)
         return peft_model