joanrodai commited on
Commit
9d9ac6a
·
verified ·
1 Parent(s): ac1e92e

Upload processing_starvector.py

Browse files
Files changed (1) hide show
  1. processing_starvector.py +66 -0
processing_starvector.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.processing_utils import ProcessorMixin
2
+ from torchvision import transforms
3
+ from torchvision.transforms.functional import InterpolationMode, pad
4
+ from transformers.feature_extraction_sequence_utils import BatchFeature
5
+
6
+ class SimpleStarVectorProcessor(ProcessorMixin):
7
+ attributes = ["tokenizer"] # Only include tokenizer in attributes
8
+ valid_kwargs = ["size", "mean", "std"] # Add other parameters as valid kwargs
9
+ image_processor_class = "AutoImageProcessor"
10
+ tokenizer_class = "AutoTokenizer"
11
+
12
+ def __init__(self,
13
+ tokenizer=None, # Make tokenizer the first argument
14
+ size=224,
15
+ mean=None,
16
+ std=None,
17
+ **kwargs,
18
+ ):
19
+ if mean is None:
20
+ mean = (0.48145466, 0.4578275, 0.40821073)
21
+ if std is None:
22
+ std = (0.26862954, 0.26130258, 0.27577711)
23
+
24
+ # Store these as instance variables
25
+ self.mean = mean
26
+ self.std = std
27
+ self.size = size
28
+
29
+ self.normalize = transforms.Normalize(mean=mean, std=std)
30
+
31
+ self.transform = transforms.Compose([
32
+ transforms.Lambda(lambda img: img.convert("RGB") if img.mode == "RGBA" else img),
33
+ transforms.Lambda(lambda img: self._pad_to_square(img)),
34
+ transforms.Resize(size, interpolation=InterpolationMode.BICUBIC),
35
+ transforms.ToTensor(),
36
+ self.normalize
37
+ ])
38
+
39
+ # Initialize parent class with tokenizer
40
+ super().__init__(tokenizer=tokenizer)
41
+
42
+
43
+ def __call__(self, images=None, text=None, **kwargs) -> BatchFeature:
44
+ """
45
+ Process images and/or text inputs.
46
+
47
+ Args:
48
+ images: Optional image input(s)
49
+ text: Optional text input(s)
50
+ **kwargs: Additional arguments
51
+ """
52
+ if images is None and text is None:
53
+ raise ValueError("You have to specify at least one of `images` or `text`.")
54
+
55
+ image_inputs = {}
56
+ if images is not None:
57
+ if isinstance(images, (list, tuple)):
58
+ images_ = [self.transform(img) for img in images]
59
+ else:
60
+ images_ = self.transform(images)
61
+ image_inputs = {"pixel_values": images_}
62
+
63
+ text_inputs = {}
64
+ if text is not None:
65
+ text_inputs = self.tokenizer(text, **kwargs)
66
+ return BatchFeature(data={**text_inputs, **image_inputs})