fantastic-futures

Sleeping

App Files Files Community

apjanco commited on Aug 14

Commit

87f8a5f

1 Parent(s): cda0268

change model options

Browse files

Files changed (3) hide show

Caracal.jpg +0 -0
README.md +3 -3
app.py +9 -51

Caracal.jpg DELETED Viewed

Binary file (133 kB)

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Caracal
-emoji: 🔥
 colorFrom: indigo
 colorTo: pink
 sdk: gradio
@@ -8,7 +8,7 @@ sdk_version: 5.9.1
 app_file: app.py
 pinned: false
 license: mit
-short_description: A simple app for doing HTR with various models.
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Fantastic Futures Endpoint
+emoji: 🚀
 colorFrom: indigo
 colorTo: pink
 sdk: gradio
 app_file: app.py
 pinned: false
 license: mit
+short_description: A simple app for a workshop in December 2025.
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import spaces
-from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, Qwen2_5_VLForConditionalGeneration
 from qwen_vl_utils import process_vision_info
 import torch
 from PIL import Image
@@ -53,8 +53,8 @@ models = {
         "Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto"
     ).cuda().eval(),
-    "medieval-data/qwen2.5-vl-old-church-slavonic": Qwen2_5_VLForConditionalGeneration.from_pretrained(
-        "medieval-data/qwen2.5-vl-old-church-slavonic", trust_remote_code=True, torch_dtype="auto"
     ).cuda().eval()
 }
@@ -62,9 +62,8 @@ processors = {
     "Qwen/Qwen2.5-VL-7B-Instruct": AutoProcessor.from_pretrained(
         "Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True
     ),
-    "medieval-data/qwen2.5-vl-old-church-slavonic": AutoProcessor.from_pretrained(
-        "medieval-data/qwen2.5-vl-old-church-slavonic", trust_remote_code=True
     )
 }
@@ -79,15 +78,13 @@ assistant_prompt = '<|assistant|>\n'
 prompt_suffix = "<|end|>\n"
 @spaces.GPU
-def run_example(image, model_id="Qwen/Qwen2.5-VL-7B-Instruct", run_ner=False, ner_labels=DEFAULT_NER_LABELS):
-    # First get the OCR text
-    text_input = "Convert the image to text."
     image_path = array_to_image_path(image)
     model = models[model_id]
     processor = processors[model_id]
-    prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
     image = Image.fromarray(image).convert("RGB")
     messages = [
     {
@@ -97,7 +94,7 @@ def run_example(image, model_id="Qwen/Qwen2.5-VL-7B-Instruct", run_ner=False, ne
                     "type": "image",
                     "image": image_path,
                 },
-                {"type": "text", "text": text_input},
             ],
         }
     ]
@@ -126,46 +123,8 @@ def run_example(image, model_id="Qwen/Qwen2.5-VL-7B-Instruct", run_ner=False, ne
     )
     ocr_text = output_text[0]
-    # If NER is enabled, process the OCR text
-    if run_ner:
-        ner_results = gliner_model.predict_entities(
-            ocr_text,
-            ner_labels.split(","),
-            threshold=0.3
-        )
-        # Create a list of tuples (text, label) for highlighting
-        highlighted_text = []
-        last_end = 0
-        # Sort entities by start position
-        sorted_entities = sorted(ner_results, key=lambda x: x["start"])
-        # Process each entity and add non-entity text segments
-        for entity in sorted_entities:
-            # Add non-entity text before the current entity
-            if last_end < entity["start"]:
-                highlighted_text.append((ocr_text[last_end:entity["start"]], None))
-            # Add the entity text with its label
-            highlighted_text.append((
-                ocr_text[entity["start"]:entity["end"]],
-                entity["label"]
-            ))
-            last_end = entity["end"]
-        # Add any remaining text after the last entity
-        if last_end < len(ocr_text):
-            highlighted_text.append((ocr_text[last_end:], None))
-        # Create TextWithMetadata instance with the highlighted text and metadata
-        result = TextWithMetadata(highlighted_text, original_text=ocr_text, entities=ner_results)
-        return result, result  # Return twice: once for display, once for state
-    # If NER is disabled, return the text without highlighting
-    result = TextWithMetadata([(ocr_text, None)], original_text=ocr_text, entities=[])
-    return result, result  # Return twice: once for display, once for state
 css = """
   /* Overall app styling */
@@ -235,7 +194,6 @@ with gr.Blocks(css=css) as demo:
     # Add state variables to store OCR results
     ocr_state = gr.State()
-    gr.Image("Caracal.jpg", interactive=False)
     with gr.Tab(label="Image Input", elem_classes="tabs"):
         with gr.Row():
             with gr.Column(elem_classes="input-container"):

 import gradio as gr
 import spaces
+from transformers import AutoModelForImageTextToText, AutoProcessor, Qwen2_5_VLForConditionalGeneration
 from qwen_vl_utils import process_vision_info
 import torch
 from PIL import Image
         "Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto"
     ).cuda().eval(),
+    "nanonets/Nanonets-OCR-s": AutoModelForImageTextToText.from_pretrained(
+        "nanonets/Nanonets-OCR-s", trust_remote_code=True, torch_dtype="auto"
     ).cuda().eval()
 }
     "Qwen/Qwen2.5-VL-7B-Instruct": AutoProcessor.from_pretrained(
         "Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True
     ),
+    "nanonets/Nanonets-OCR-s": AutoProcessor.from_pretrained(
+        "nanonets/Nanonets-OCR-s", trust_remote_code=True
     )
 }
 prompt_suffix = "<|end|>\n"
 @spaces.GPU
+def run_example(image:str, model_id:str = "nanonets/Nanonets-OCR-s", prompt: str = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Return the equations in LaTeX representation. If there is an image in the document and image caption is not present, add a small description of the image inside the <img></img> tag; otherwise, add the image caption inside <img></img>. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""):
     image_path = array_to_image_path(image)
     model = models[model_id]
     processor = processors[model_id]
     image = Image.fromarray(image).convert("RGB")
     messages = [
     {
                     "type": "image",
                     "image": image_path,
                 },
+                {"type": "text", "text": prompt},
             ],
         }
     ]
     )
     ocr_text = output_text[0]
+    return ocr_text, ocr_text  # Return twice: once for display, once for state
 css = """
   /* Overall app styling */
     # Add state variables to store OCR results
     ocr_state = gr.State()
     with gr.Tab(label="Image Input", elem_classes="tabs"):
         with gr.Row():
             with gr.Column(elem_classes="input-container"):