jupyterjazz commited on
Commit
9e83d6f
·
verified ·
1 Parent(s): baee517

support-max-pixels (#6)

Browse files

- feat: configure max pixels (509c5332bacf540daad3c8fcead31c9abf2e2452)

Files changed (2) hide show
  1. README.md +2 -2
  2. modeling_jina_embeddings_v4.py +40 -0
README.md CHANGED
@@ -24,8 +24,8 @@ images = [Image.open(path) for path in image_paths]
24
  # Example 1: Text matching task with single vector embeddings
25
  model.set_task(task='text-matching')
26
 
27
- # Generate embeddings with dimension truncation (256)
28
- img_embeddings = model.encode_images(images=images, truncate_dim=256)
29
  text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512)
30
 
31
  # Example 2: Retrieval task with multi-vector embeddings
 
24
  # Example 1: Text matching task with single vector embeddings
25
  model.set_task(task='text-matching')
26
 
27
+ # Generate embeddings with dimension truncation (256), decrease max_pixels
28
+ img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112)
29
  text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512)
30
 
31
  # Example 2: Retrieval task with multi-vector embeddings
modeling_jina_embeddings_v4.py CHANGED
@@ -374,6 +374,21 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
374
  truncate_dim: Optional[int] = None,
375
  text_type: Optional[str] = None,
376
  ) -> List[torch.Tensor]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  text_type = text_type or "query"
378
  encode_kwargs = self._validate_encoding_params(
379
  vector_type, truncate_dim, text_type
@@ -404,7 +419,26 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
404
  vector_type: Optional[str] = None,
405
  return_numpy: bool = False,
406
  truncate_dim: Optional[int] = None,
 
407
  ) -> List[torch.Tensor]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
  encode_kwargs = self._validate_encoding_params(vector_type, truncate_dim)
409
 
410
  is_single = len(images) == 1
@@ -417,6 +451,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
417
  **encode_kwargs,
418
  )
419
 
 
 
 
420
  return embeddings[0] if is_single else embeddings
421
 
422
  @classmethod
@@ -426,6 +463,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
426
  *args,
427
  **kwargs,
428
  ):
 
 
 
429
  if "torch_dtype" not in kwargs:
430
  kwargs["torch_dtype"] = "auto"
431
 
 
374
  truncate_dim: Optional[int] = None,
375
  text_type: Optional[str] = None,
376
  ) -> List[torch.Tensor]:
377
+ """
378
+ Encodes a list of texts into embeddings.
379
+
380
+ Args:
381
+ texts: List of text strings to encode
382
+ max_length: Maximum token length for text processing
383
+ batch_size: Number of texts to process at once
384
+ vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
385
+ return_numpy: Whether to return numpy arrays instead of torch tensors
386
+ truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
387
+ text_type: Type of text being encoded ('query' or 'passage')
388
+
389
+ Returns:
390
+ List of text embeddings as tensors or numpy arrays
391
+ """
392
  text_type = text_type or "query"
393
  encode_kwargs = self._validate_encoding_params(
394
  vector_type, truncate_dim, text_type
 
419
  vector_type: Optional[str] = None,
420
  return_numpy: bool = False,
421
  truncate_dim: Optional[int] = None,
422
+ max_pixels: Optional[int] = None,
423
  ) -> List[torch.Tensor]:
424
+ """
425
+ Encodes a list of images into embeddings.
426
+
427
+ Args:
428
+ images: List of PIL images to encode
429
+ batch_size: Number of images to process at once
430
+ vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
431
+ return_numpy: Whether to return numpy arrays instead of torch tensors
432
+ truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
433
+ max_pixels: Maximum number of pixels to process per image
434
+
435
+ Returns:
436
+ List of image embeddings as tensors or numpy arrays
437
+ """
438
+ if max_pixels:
439
+ default_max_pixels = self.processor.image_processor.max_pixels
440
+ self.processor.image_processor.max_pixels = max_pixels # change during encoding
441
+
442
  encode_kwargs = self._validate_encoding_params(vector_type, truncate_dim)
443
 
444
  is_single = len(images) == 1
 
451
  **encode_kwargs,
452
  )
453
 
454
+ if max_pixels:
455
+ self.processor.image_processor.max_pixels = default_max_pixels
456
+
457
  return embeddings[0] if is_single else embeddings
458
 
459
  @classmethod
 
463
  *args,
464
  **kwargs,
465
  ):
466
+ """
467
+ Loads a pretrained model and configures it with the appropriate task adapter (`retrieval` by default).
468
+ """
469
  if "torch_dtype" not in kwargs:
470
  kwargs["torch_dtype"] = "auto"
471