mynkchaudhry commited on
Commit
b271d00
·
1 Parent(s): beb129b

upload all the files

Browse files
Files changed (1) hide show
  1. app.py +47 -60
app.py CHANGED
@@ -1,65 +1,52 @@
1
- import os
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoProcessor
4
  from PIL import Image
5
  import torch
6
 
7
- def prompt_user_acceptance():
8
- # Check environment variable
9
- if os.getenv('TRUST_REMOTE_CODE') != 'true':
10
- print("You must accept to continue. Set the environment variable TRUST_REMOTE_CODE=true.")
11
- exit(1)
12
-
13
- def main():
14
- prompt_user_acceptance()
15
-
16
- # Load model and processor
17
- model = AutoModelForCausalLM.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
18
- processor = AutoProcessor.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
19
-
20
- def generate_response(image, question):
21
- try:
22
- if image.mode != "RGB":
23
- image = image.convert("RGB")
24
-
25
- inputs = processor(text=question, images=image, return_tensors="pt")
26
-
27
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
- model.to(device)
29
- inputs = {key: value.to(device) for key, value in inputs.items()}
30
-
31
- generated_ids = model.generate(
32
- input_ids=inputs["input_ids"],
33
- pixel_values=inputs["pixel_values"],
34
- max_length=1024,
35
- num_beams=3,
36
- early_stopping=True
37
- )
38
-
39
- response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
40
- return response
41
- except Exception as e:
42
- return f"Error processing image: {e}"
43
-
44
- # Example images for demonstration (update paths as needed)
45
- examples = [
46
- ["demo.png", "what is the address in the page?"],
47
- ["demo2.jpg", "what is the date in the page?"],
48
- ["demo.png", "what is the name in the page?"]
49
- ]
50
-
51
- # Gradio interface
52
- iface = gr.Interface(
53
- fn=generate_response,
54
- inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")],
55
- outputs=gr.Textbox(label="Response"),
56
- examples=examples,
57
- title="Image to Text Extractor",
58
- description="Upload an image and provide a question. This tool will extract the relevant information from the image based on your question."
59
- )
60
-
61
- # Launch the interface
62
- iface.launch()
63
-
64
- if __name__ == "__main__":
65
- main()
 
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoProcessor
3
  from PIL import Image
4
  import torch
5
 
6
+ # Load model and processor with trust_remote_code=True
7
+ model = AutoModelForCausalLM.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True, trust_remote_code=True)
8
+ processor = AutoProcessor.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True, trust_remote_code=True)
9
+
10
+ def generate_response(image, question):
11
+ try:
12
+ if image.mode != "RGB":
13
+ image = image.convert("RGB")
14
+
15
+ inputs = processor(text=question, images=image, return_tensors="pt")
16
+
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+ model.to(device)
19
+ inputs = {key: value.to(device) for key, value in inputs.items()}
20
+
21
+ generated_ids = model.generate(
22
+ input_ids=inputs["input_ids"],
23
+ pixel_values=inputs["pixel_values"],
24
+ max_length=1024,
25
+ num_beams=3,
26
+ early_stopping=True
27
+ )
28
+
29
+ response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
30
+ return response
31
+ except Exception as e:
32
+ return f"Error processing image: {e}"
33
+
34
+ # Example images for demonstration (update paths as needed)
35
+ examples = [
36
+ ["demo.png", "what is the address in the page?"],
37
+ ["demo2.jpg", "what is the date in the page?"],
38
+ ["demo.png", "what is the name in the page?"]
39
+ ]
40
+
41
+ # Gradio interface
42
+ iface = gr.Interface(
43
+ fn=generate_response,
44
+ inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")],
45
+ outputs=gr.Textbox(label="Response"),
46
+ examples=examples,
47
+ title="Image to Text Extractor",
48
+ description="Upload an image and provide a question. This tool will extract the relevant information from the image based on your question."
49
+ )
50
+
51
+ # Launch the interface
52
+ iface.launch()