mynkchaudhry commited on
Commit
c6d3ca9
·
1 Parent(s): d246a8f

upload all the files

Browse files
Files changed (1) hide show
  1. app.py +66 -52
app.py CHANGED
@@ -1,52 +1,66 @@
1
- import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoProcessor
3
- from PIL import Image
4
- import torch
5
-
6
- # Load model and processor
7
- model = AutoModelForCausalLM.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
8
- processor = AutoProcessor.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
9
-
10
- def generate_response(image, question):
11
- try:
12
- if image.mode != "RGB":
13
- image = image.convert("RGB")
14
-
15
- inputs = processor(text=question, images=image, return_tensors="pt")
16
-
17
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
- model.to(device)
19
- inputs = {key: value.to(device) for key, value in inputs.items()}
20
-
21
- generated_ids = model.generate(
22
- input_ids=inputs["input_ids"],
23
- pixel_values=inputs["pixel_values"],
24
- max_length=1024,
25
- num_beams=3,
26
- early_stopping=True
27
- )
28
-
29
- response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
30
- return response
31
- except Exception as e:
32
- return f"Error processing image: {e}"
33
-
34
- # Example images for demonstration (update paths as needed)
35
- examples = [
36
- ["demo.png", "what is the address in the page?"],
37
- ["demo2.jpg", "what is the date in the page?"],
38
- ["demo.png", "what is the name in the page?"]
39
- ]
40
-
41
- # Gradio interface
42
- iface = gr.Interface(
43
- fn=generate_response,
44
- inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")],
45
- outputs=gr.Textbox(label="Response"),
46
- examples=examples,
47
- title="Image to Text Extractor",
48
- description="Upload an image and provide a question. This tool will extract the relevant information from the image based on your question."
49
- )
50
-
51
- # Launch the interface
52
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ def prompt_user_acceptance():
4
+ response = input("Do you accept the execution of remote code from the model repository? (y/N): ").strip().lower()
5
+ if response != 'y':
6
+ print("You must accept to continue.")
7
+ exit(1)
8
+
9
+ def main():
10
+ prompt_user_acceptance()
11
+
12
+ import gradio as gr
13
+ from transformers import AutoModelForCausalLM, AutoProcessor
14
+ from PIL import Image
15
+ import torch
16
+
17
+ # Load model and processor
18
+ model = AutoModelForCausalLM.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
19
+ processor = AutoProcessor.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
20
+
21
+ def generate_response(image, question):
22
+ try:
23
+ if image.mode != "RGB":
24
+ image = image.convert("RGB")
25
+
26
+ inputs = processor(text=question, images=image, return_tensors="pt")
27
+
28
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
+ model.to(device)
30
+ inputs = {key: value.to(device) for key, value in inputs.items()}
31
+
32
+ generated_ids = model.generate(
33
+ input_ids=inputs["input_ids"],
34
+ pixel_values=inputs["pixel_values"],
35
+ max_length=1024,
36
+ num_beams=3,
37
+ early_stopping=True
38
+ )
39
+
40
+ response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
41
+ return response
42
+ except Exception as e:
43
+ return f"Error processing image: {e}"
44
+
45
+ # Example images for demonstration (update paths as needed)
46
+ examples = [
47
+ ["demo.png", "what is the address in the page?"],
48
+ ["demo2.jpg", "what is the date in the page?"],
49
+ ["demo.png", "what is the name in the page?"]
50
+ ]
51
+
52
+ # Gradio interface
53
+ iface = gr.Interface(
54
+ fn=generate_response,
55
+ inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")],
56
+ outputs=gr.Textbox(label="Response"),
57
+ examples=examples,
58
+ title="Image to Text Extractor",
59
+ description="Upload an image and provide a question. This tool will extract the relevant information from the image based on your question."
60
+ )
61
+
62
+ # Launch the interface
63
+ iface.launch()
64
+
65
+ if __name__ == "__main__":
66
+ main()