Spaces:

mynkchaudhry
/

Image2text

Runtime error

App Files Files Community

mynkchaudhry commited on Jul 16, 2024

Commit

c6d3ca9

1 Parent(s): d246a8f

upload all the files

Browse files

Files changed (1) hide show

app.py +66 -52

app.py CHANGED Viewed

@@ -1,52 +1,66 @@
-import gradio as gr
-from transformers import AutoModelForCausalLM, AutoProcessor
-from PIL import Image
-import torch
-# Load model and processor
-model = AutoModelForCausalLM.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
-processor = AutoProcessor.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
-def generate_response(image, question):
-    try:
-        if image.mode != "RGB":
-            image = image.convert("RGB")
-        inputs = processor(text=question, images=image, return_tensors="pt")
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        model.to(device)
-        inputs = {key: value.to(device) for key, value in inputs.items()}
-        generated_ids = model.generate(
-            input_ids=inputs["input_ids"],
-            pixel_values=inputs["pixel_values"],
-            max_length=1024,
-            num_beams=3,
-            early_stopping=True
-        )
-        response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return response
-    except Exception as e:
-        return f"Error processing image: {e}"
-# Example images for demonstration (update paths as needed)
-examples = [
-    ["demo.png", "what is the address in the page?"],
-    ["demo2.jpg", "what is the date in the page?"],
-    ["demo.png", "what is the name in the page?"]
-]
-# Gradio interface
-iface = gr.Interface(
-    fn=generate_response,
-    inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")],
-    outputs=gr.Textbox(label="Response"),
-    examples=examples,
-    title="Image to Text Extractor",
-    description="Upload an image and provide a question. This tool will extract the relevant information from the image based on your question."
-)
-# Launch the interface
-iface.launch()

+import os
+def prompt_user_acceptance():
+    response = input("Do you accept the execution of remote code from the model repository? (y/N): ").strip().lower()
+    if response != 'y':
+        print("You must accept to continue.")
+        exit(1)
+def main():
+    prompt_user_acceptance()
+    import gradio as gr
+    from transformers import AutoModelForCausalLM, AutoProcessor
+    from PIL import Image
+    import torch
+    # Load model and processor
+    model = AutoModelForCausalLM.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
+    processor = AutoProcessor.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", force_download=True)
+    def generate_response(image, question):
+        try:
+            if image.mode != "RGB":
+                image = image.convert("RGB")
+            inputs = processor(text=question, images=image, return_tensors="pt")
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            model.to(device)
+            inputs = {key: value.to(device) for key, value in inputs.items()}
+            generated_ids = model.generate(
+                input_ids=inputs["input_ids"],
+                pixel_values=inputs["pixel_values"],
+                max_length=1024,
+                num_beams=3,
+                early_stopping=True
+            )
+            response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+            return response
+        except Exception as e:
+            return f"Error processing image: {e}"
+    # Example images for demonstration (update paths as needed)
+    examples = [
+        ["demo.png", "what is the address in the page?"],
+        ["demo2.jpg", "what is the date in the page?"],
+        ["demo.png", "what is the name in the page?"]
+    ]
+    # Gradio interface
+    iface = gr.Interface(
+        fn=generate_response,
+        inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")],
+        outputs=gr.Textbox(label="Response"),
+        examples=examples,
+        title="Image to Text Extractor",
+        description="Upload an image and provide a question. This tool will extract the relevant information from the image based on your question."
+    )
+    # Launch the interface
+    iface.launch()
+if __name__ == "__main__":
+    main()