shambhuDATA commited on
Commit
921a194
·
verified ·
1 Parent(s): e1f1f86

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL_NAME="allenai/MolmoE-1B-0924"
2
+ from transformers import AutoModelForCausalLM
3
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
4
+ from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
5
+ from PIL import Image
6
+ import requests
7
+
8
+ # load the processor
9
+ processor = AutoProcessor.from_pretrained(
10
+ 'allenai/MolmoE-1B-0924',
11
+ trust_remote_code=True,
12
+ torch_dtype='auto',
13
+ device_map='auto'
14
+ )
15
+
16
+ # load the model
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ 'allenai/MolmoE-1B-0924',
19
+ trust_remote_code=True,
20
+ torch_dtype='auto',
21
+ device_map='auto'
22
+ )
23
+
24
+ # process the image and text
25
+ inputs = processor.process(
26
+ images=[Image.open(requests.get("https://picsum.photos/id/237/536/354", stream=True).raw)],
27
+ text="Describe this image."
28
+ )
29
+
30
+ # move inputs to the correct device and make a batch of size 1
31
+ inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
32
+
33
+ # generate output; maximum 200 new tokens; stop generation when <|endoftext|> is generated
34
+ output = model.generate_from_batch(
35
+ inputs,
36
+ GenerationConfig(max_new_tokens=200, stop_strings="<|endoftext|>"),
37
+ tokenizer=processor.tokenizer
38
+ )
39
+
40
+ # only get generated tokens; decode them to text
41
+ generated_tokens = output[0,inputs['input_ids'].size(1):]
42
+ generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
43
+
44
+ # print the generated text
45
+ print(generated_text)
46
+
47
+ # >>> This photograph captures a small black puppy, likely a Labrador or a similar breed,
48
+ # sitting attentively on a weathered wooden deck. The deck, composed of three...
49
+
50
+
51
+ # import cv2
52
+
53
+
54
+ # class Solution():
55
+ # def __init__(self,prompt):
56
+ # self.prompt= prompt
57
+ # self.output_dir=None
58
+
59
+ # # read a mp4 file and getting its frame at a particular interval.
60
+ # def read_frame(self,file,interval=1):
61
+ # video=cv2.VideoCapture(file)
62
+ # fps= video.get(cv2.CAP_PROP_FPS)
63
+
64
+ # frame_interval= fps*interval# fps= 24 frame/sec and interval = 1 sec so frame interval = 24 frame
65
+ # while True:
66
+ # success, frame=video.read()
67
+ # if not success:
68
+ # break
69
+
70
+ # if frame % frame_interval==0:
71
+ # # process this frame
72
+ # """
73
+ # .. to do
74
+ # """
75
+
76
+ # def find(self,input_message):
77
+
78
+
79
+
80
+
81
+
82
+
83
+
84
+ # read a .mp4 file
85
+ # get a interval N spaced
86
+
87
+
88
+