Spaces:
Sleeping
Sleeping
change model options
Browse files- Caracal.jpg +0 -0
- README.md +3 -3
- app.py +9 -51
Caracal.jpg
DELETED
Binary file (133 kB)
|
|
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
colorFrom: indigo
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
@@ -8,7 +8,7 @@ sdk_version: 5.9.1
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
-
short_description: A simple app for
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Fantastic Futures Endpoint
|
3 |
+
emoji: 🚀
|
4 |
colorFrom: indigo
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
+
short_description: A simple app for a workshop in December 2025.
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
-
from transformers import
|
4 |
from qwen_vl_utils import process_vision_info
|
5 |
import torch
|
6 |
from PIL import Image
|
@@ -53,8 +53,8 @@ models = {
|
|
53 |
"Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto"
|
54 |
).cuda().eval(),
|
55 |
|
56 |
-
"
|
57 |
-
"
|
58 |
).cuda().eval()
|
59 |
}
|
60 |
|
@@ -62,9 +62,8 @@ processors = {
|
|
62 |
"Qwen/Qwen2.5-VL-7B-Instruct": AutoProcessor.from_pretrained(
|
63 |
"Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True
|
64 |
),
|
65 |
-
|
66 |
-
|
67 |
-
"medieval-data/qwen2.5-vl-old-church-slavonic", trust_remote_code=True
|
68 |
)
|
69 |
}
|
70 |
|
@@ -79,15 +78,13 @@ assistant_prompt = '<|assistant|>\n'
|
|
79 |
prompt_suffix = "<|end|>\n"
|
80 |
|
81 |
@spaces.GPU
|
82 |
-
def run_example(image, model_id="
|
83 |
-
|
84 |
-
text_input = "Convert the image to text."
|
85 |
image_path = array_to_image_path(image)
|
86 |
|
87 |
model = models[model_id]
|
88 |
processor = processors[model_id]
|
89 |
|
90 |
-
prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
|
91 |
image = Image.fromarray(image).convert("RGB")
|
92 |
messages = [
|
93 |
{
|
@@ -97,7 +94,7 @@ def run_example(image, model_id="Qwen/Qwen2.5-VL-7B-Instruct", run_ner=False, ne
|
|
97 |
"type": "image",
|
98 |
"image": image_path,
|
99 |
},
|
100 |
-
{"type": "text", "text":
|
101 |
],
|
102 |
}
|
103 |
]
|
@@ -126,46 +123,8 @@ def run_example(image, model_id="Qwen/Qwen2.5-VL-7B-Instruct", run_ner=False, ne
|
|
126 |
)
|
127 |
|
128 |
ocr_text = output_text[0]
|
129 |
-
|
130 |
-
# If NER is enabled, process the OCR text
|
131 |
-
if run_ner:
|
132 |
-
ner_results = gliner_model.predict_entities(
|
133 |
-
ocr_text,
|
134 |
-
ner_labels.split(","),
|
135 |
-
threshold=0.3
|
136 |
-
)
|
137 |
-
|
138 |
-
# Create a list of tuples (text, label) for highlighting
|
139 |
-
highlighted_text = []
|
140 |
-
last_end = 0
|
141 |
|
142 |
-
|
143 |
-
sorted_entities = sorted(ner_results, key=lambda x: x["start"])
|
144 |
-
|
145 |
-
# Process each entity and add non-entity text segments
|
146 |
-
for entity in sorted_entities:
|
147 |
-
# Add non-entity text before the current entity
|
148 |
-
if last_end < entity["start"]:
|
149 |
-
highlighted_text.append((ocr_text[last_end:entity["start"]], None))
|
150 |
-
|
151 |
-
# Add the entity text with its label
|
152 |
-
highlighted_text.append((
|
153 |
-
ocr_text[entity["start"]:entity["end"]],
|
154 |
-
entity["label"]
|
155 |
-
))
|
156 |
-
last_end = entity["end"]
|
157 |
-
|
158 |
-
# Add any remaining text after the last entity
|
159 |
-
if last_end < len(ocr_text):
|
160 |
-
highlighted_text.append((ocr_text[last_end:], None))
|
161 |
-
|
162 |
-
# Create TextWithMetadata instance with the highlighted text and metadata
|
163 |
-
result = TextWithMetadata(highlighted_text, original_text=ocr_text, entities=ner_results)
|
164 |
-
return result, result # Return twice: once for display, once for state
|
165 |
-
|
166 |
-
# If NER is disabled, return the text without highlighting
|
167 |
-
result = TextWithMetadata([(ocr_text, None)], original_text=ocr_text, entities=[])
|
168 |
-
return result, result # Return twice: once for display, once for state
|
169 |
|
170 |
css = """
|
171 |
/* Overall app styling */
|
@@ -235,7 +194,6 @@ with gr.Blocks(css=css) as demo:
|
|
235 |
# Add state variables to store OCR results
|
236 |
ocr_state = gr.State()
|
237 |
|
238 |
-
gr.Image("Caracal.jpg", interactive=False)
|
239 |
with gr.Tab(label="Image Input", elem_classes="tabs"):
|
240 |
with gr.Row():
|
241 |
with gr.Column(elem_classes="input-container"):
|
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
+
from transformers import AutoModelForImageTextToText, AutoProcessor, Qwen2_5_VLForConditionalGeneration
|
4 |
from qwen_vl_utils import process_vision_info
|
5 |
import torch
|
6 |
from PIL import Image
|
|
|
53 |
"Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto"
|
54 |
).cuda().eval(),
|
55 |
|
56 |
+
"nanonets/Nanonets-OCR-s": AutoModelForImageTextToText.from_pretrained(
|
57 |
+
"nanonets/Nanonets-OCR-s", trust_remote_code=True, torch_dtype="auto"
|
58 |
).cuda().eval()
|
59 |
}
|
60 |
|
|
|
62 |
"Qwen/Qwen2.5-VL-7B-Instruct": AutoProcessor.from_pretrained(
|
63 |
"Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True
|
64 |
),
|
65 |
+
"nanonets/Nanonets-OCR-s": AutoProcessor.from_pretrained(
|
66 |
+
"nanonets/Nanonets-OCR-s", trust_remote_code=True
|
|
|
67 |
)
|
68 |
}
|
69 |
|
|
|
78 |
prompt_suffix = "<|end|>\n"
|
79 |
|
80 |
@spaces.GPU
|
81 |
+
def run_example(image:str, model_id:str = "nanonets/Nanonets-OCR-s", prompt: str = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Return the equations in LaTeX representation. If there is an image in the document and image caption is not present, add a small description of the image inside the <img></img> tag; otherwise, add the image caption inside <img></img>. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""):
|
82 |
+
|
|
|
83 |
image_path = array_to_image_path(image)
|
84 |
|
85 |
model = models[model_id]
|
86 |
processor = processors[model_id]
|
87 |
|
|
|
88 |
image = Image.fromarray(image).convert("RGB")
|
89 |
messages = [
|
90 |
{
|
|
|
94 |
"type": "image",
|
95 |
"image": image_path,
|
96 |
},
|
97 |
+
{"type": "text", "text": prompt},
|
98 |
],
|
99 |
}
|
100 |
]
|
|
|
123 |
)
|
124 |
|
125 |
ocr_text = output_text[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
+
return ocr_text, ocr_text # Return twice: once for display, once for state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
css = """
|
130 |
/* Overall app styling */
|
|
|
194 |
# Add state variables to store OCR results
|
195 |
ocr_state = gr.State()
|
196 |
|
|
|
197 |
with gr.Tab(label="Image Input", elem_classes="tabs"):
|
198 |
with gr.Row():
|
199 |
with gr.Column(elem_classes="input-container"):
|