support-max-pixels (#6)
Browse files- feat: configure max pixels (509c5332bacf540daad3c8fcead31c9abf2e2452)
- README.md +2 -2
- modeling_jina_embeddings_v4.py +40 -0
README.md
CHANGED
@@ -24,8 +24,8 @@ images = [Image.open(path) for path in image_paths]
|
|
24 |
# Example 1: Text matching task with single vector embeddings
|
25 |
model.set_task(task='text-matching')
|
26 |
|
27 |
-
# Generate embeddings with dimension truncation (256)
|
28 |
-
img_embeddings = model.encode_images(images=images, truncate_dim=256)
|
29 |
text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512)
|
30 |
|
31 |
# Example 2: Retrieval task with multi-vector embeddings
|
|
|
24 |
# Example 1: Text matching task with single vector embeddings
|
25 |
model.set_task(task='text-matching')
|
26 |
|
27 |
+
# Generate embeddings with dimension truncation (256), decrease max_pixels
|
28 |
+
img_embeddings = model.encode_images(images=images, truncate_dim=256, max_pixels=602112)
|
29 |
text_embeddings = model.encode_texts(texts=texts, truncate_dim=256, max_length=512)
|
30 |
|
31 |
# Example 2: Retrieval task with multi-vector embeddings
|
modeling_jina_embeddings_v4.py
CHANGED
@@ -374,6 +374,21 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
374 |
truncate_dim: Optional[int] = None,
|
375 |
text_type: Optional[str] = None,
|
376 |
) -> List[torch.Tensor]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
377 |
text_type = text_type or "query"
|
378 |
encode_kwargs = self._validate_encoding_params(
|
379 |
vector_type, truncate_dim, text_type
|
@@ -404,7 +419,26 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
404 |
vector_type: Optional[str] = None,
|
405 |
return_numpy: bool = False,
|
406 |
truncate_dim: Optional[int] = None,
|
|
|
407 |
) -> List[torch.Tensor]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
408 |
encode_kwargs = self._validate_encoding_params(vector_type, truncate_dim)
|
409 |
|
410 |
is_single = len(images) == 1
|
@@ -417,6 +451,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
417 |
**encode_kwargs,
|
418 |
)
|
419 |
|
|
|
|
|
|
|
420 |
return embeddings[0] if is_single else embeddings
|
421 |
|
422 |
@classmethod
|
@@ -426,6 +463,9 @@ class JinaEmbeddingsV4Model(Qwen2_5_VLForConditionalGeneration):
|
|
426 |
*args,
|
427 |
**kwargs,
|
428 |
):
|
|
|
|
|
|
|
429 |
if "torch_dtype" not in kwargs:
|
430 |
kwargs["torch_dtype"] = "auto"
|
431 |
|
|
|
374 |
truncate_dim: Optional[int] = None,
|
375 |
text_type: Optional[str] = None,
|
376 |
) -> List[torch.Tensor]:
|
377 |
+
"""
|
378 |
+
Encodes a list of texts into embeddings.
|
379 |
+
|
380 |
+
Args:
|
381 |
+
texts: List of text strings to encode
|
382 |
+
max_length: Maximum token length for text processing
|
383 |
+
batch_size: Number of texts to process at once
|
384 |
+
vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
|
385 |
+
return_numpy: Whether to return numpy arrays instead of torch tensors
|
386 |
+
truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
|
387 |
+
text_type: Type of text being encoded ('query' or 'passage')
|
388 |
+
|
389 |
+
Returns:
|
390 |
+
List of text embeddings as tensors or numpy arrays
|
391 |
+
"""
|
392 |
text_type = text_type or "query"
|
393 |
encode_kwargs = self._validate_encoding_params(
|
394 |
vector_type, truncate_dim, text_type
|
|
|
419 |
vector_type: Optional[str] = None,
|
420 |
return_numpy: bool = False,
|
421 |
truncate_dim: Optional[int] = None,
|
422 |
+
max_pixels: Optional[int] = None,
|
423 |
) -> List[torch.Tensor]:
|
424 |
+
"""
|
425 |
+
Encodes a list of images into embeddings.
|
426 |
+
|
427 |
+
Args:
|
428 |
+
images: List of PIL images to encode
|
429 |
+
batch_size: Number of images to process at once
|
430 |
+
vector_type: Type of embedding vector to generate ('single_vector' or 'multi_vector')
|
431 |
+
return_numpy: Whether to return numpy arrays instead of torch tensors
|
432 |
+
truncate_dim: Dimension to truncate embeddings to (128, 256, 512, or 1024)
|
433 |
+
max_pixels: Maximum number of pixels to process per image
|
434 |
+
|
435 |
+
Returns:
|
436 |
+
List of image embeddings as tensors or numpy arrays
|
437 |
+
"""
|
438 |
+
if max_pixels:
|
439 |
+
default_max_pixels = self.processor.image_processor.max_pixels
|
440 |
+
self.processor.image_processor.max_pixels = max_pixels # change during encoding
|
441 |
+
|
442 |
encode_kwargs = self._validate_encoding_params(vector_type, truncate_dim)
|
443 |
|
444 |
is_single = len(images) == 1
|
|
|
451 |
**encode_kwargs,
|
452 |
)
|
453 |
|
454 |
+
if max_pixels:
|
455 |
+
self.processor.image_processor.max_pixels = default_max_pixels
|
456 |
+
|
457 |
return embeddings[0] if is_single else embeddings
|
458 |
|
459 |
@classmethod
|
|
|
463 |
*args,
|
464 |
**kwargs,
|
465 |
):
|
466 |
+
"""
|
467 |
+
Loads a pretrained model and configures it with the appropriate task adapter (`retrieval` by default).
|
468 |
+
"""
|
469 |
if "torch_dtype" not in kwargs:
|
470 |
kwargs["torch_dtype"] = "auto"
|
471 |
|