momergul commited on
Commit
40d80f1
·
verified ·
1 Parent(s): 7f78f1b

Upload processor_flamingo.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. processor_flamingo.py +65 -0
processor_flamingo.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import ProcessorMixin, AutoProcessor
2
+ from transformers.models.auto.processing_auto import AutoProcessor
3
+ from transformers.processing_utils import ProcessorMixin
4
+ import json
5
+ import os
6
+
7
+ class FlamingoProcessor(ProcessorMixin):
8
+ """
9
+ Custom processor that combines a tokenizer and feature extractor.
10
+ """
11
+ attributes = ["feature_extractor", "tokenizer"]
12
+ feature_extractor_class = "AutoImageProcessor"
13
+ tokenizer_class = "AutoTokenizer"
14
+
15
+ def __init__(self, feature_extractor, tokenizer):
16
+ super().__init__(feature_extractor, tokenizer)
17
+
18
+ def __call__(self, text=None, images=None, **kwargs):
19
+ """
20
+ Main processing method that handles both text and images.
21
+
22
+ Args:
23
+ text: Text input(s) to tokenize
24
+ images: Image input(s) to process
25
+ **kwargs: Additional arguments passed to tokenizer/feature_extractor
26
+
27
+ Returns:
28
+ Dictionary with processed inputs
29
+ """
30
+ if text is None and images is None:
31
+ raise ValueError("You need to specify either text or images")
32
+
33
+ encoding = {}
34
+
35
+ # Process text if provided
36
+ if text is not None:
37
+ if type(text) == str:
38
+ all_text = "<image> " + text
39
+ else:
40
+ all_text = ["<image> " + _text for _text in text]
41
+ text_encoding = self.tokenizer(all_text, **kwargs)
42
+ encoding.update(text_encoding)
43
+
44
+ # Process images if provided
45
+ if images is not None:
46
+ image_encoding = self.feature_extractor(images, **kwargs)
47
+ # Add prefix to avoid key conflicts
48
+ for key, value in image_encoding.items():
49
+ encoding[f"pixel_values" if key == "pixel_values" else f"image_{key}"] = value
50
+
51
+ return encoding
52
+
53
+ def batch_decode(self, *args, **kwargs):
54
+ """
55
+ Delegate batch decoding to the tokenizer.
56
+ """
57
+ return self.tokenizer.batch_decode(*args, **kwargs)
58
+
59
+ def decode(self, *args, **kwargs):
60
+ """
61
+ Delegate decoding to the tokenizer.
62
+ """
63
+ return self.tokenizer.decode(*args, **kwargs)
64
+
65
+