apjanco commited on
Commit
87f8a5f
·
1 Parent(s): cda0268

change model options

Browse files
Files changed (3) hide show
  1. Caracal.jpg +0 -0
  2. README.md +3 -3
  3. app.py +9 -51
Caracal.jpg DELETED
Binary file (133 kB)
 
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Caracal
3
- emoji: 🔥
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: gradio
@@ -8,7 +8,7 @@ sdk_version: 5.9.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- short_description: A simple app for doing HTR with various models.
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Fantastic Futures Endpoint
3
+ emoji: 🚀
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: gradio
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ short_description: A simple app for a workshop in December 2025.
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import spaces
3
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, Qwen2_5_VLForConditionalGeneration
4
  from qwen_vl_utils import process_vision_info
5
  import torch
6
  from PIL import Image
@@ -53,8 +53,8 @@ models = {
53
  "Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto"
54
  ).cuda().eval(),
55
 
56
- "medieval-data/qwen2.5-vl-old-church-slavonic": Qwen2_5_VLForConditionalGeneration.from_pretrained(
57
- "medieval-data/qwen2.5-vl-old-church-slavonic", trust_remote_code=True, torch_dtype="auto"
58
  ).cuda().eval()
59
  }
60
 
@@ -62,9 +62,8 @@ processors = {
62
  "Qwen/Qwen2.5-VL-7B-Instruct": AutoProcessor.from_pretrained(
63
  "Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True
64
  ),
65
-
66
- "medieval-data/qwen2.5-vl-old-church-slavonic": AutoProcessor.from_pretrained(
67
- "medieval-data/qwen2.5-vl-old-church-slavonic", trust_remote_code=True
68
  )
69
  }
70
 
@@ -79,15 +78,13 @@ assistant_prompt = '<|assistant|>\n'
79
  prompt_suffix = "<|end|>\n"
80
 
81
  @spaces.GPU
82
- def run_example(image, model_id="Qwen/Qwen2.5-VL-7B-Instruct", run_ner=False, ner_labels=DEFAULT_NER_LABELS):
83
- # First get the OCR text
84
- text_input = "Convert the image to text."
85
  image_path = array_to_image_path(image)
86
 
87
  model = models[model_id]
88
  processor = processors[model_id]
89
 
90
- prompt = f"{user_prompt}<|image_1|>\n{text_input}{prompt_suffix}{assistant_prompt}"
91
  image = Image.fromarray(image).convert("RGB")
92
  messages = [
93
  {
@@ -97,7 +94,7 @@ def run_example(image, model_id="Qwen/Qwen2.5-VL-7B-Instruct", run_ner=False, ne
97
  "type": "image",
98
  "image": image_path,
99
  },
100
- {"type": "text", "text": text_input},
101
  ],
102
  }
103
  ]
@@ -126,46 +123,8 @@ def run_example(image, model_id="Qwen/Qwen2.5-VL-7B-Instruct", run_ner=False, ne
126
  )
127
 
128
  ocr_text = output_text[0]
129
-
130
- # If NER is enabled, process the OCR text
131
- if run_ner:
132
- ner_results = gliner_model.predict_entities(
133
- ocr_text,
134
- ner_labels.split(","),
135
- threshold=0.3
136
- )
137
-
138
- # Create a list of tuples (text, label) for highlighting
139
- highlighted_text = []
140
- last_end = 0
141
 
142
- # Sort entities by start position
143
- sorted_entities = sorted(ner_results, key=lambda x: x["start"])
144
-
145
- # Process each entity and add non-entity text segments
146
- for entity in sorted_entities:
147
- # Add non-entity text before the current entity
148
- if last_end < entity["start"]:
149
- highlighted_text.append((ocr_text[last_end:entity["start"]], None))
150
-
151
- # Add the entity text with its label
152
- highlighted_text.append((
153
- ocr_text[entity["start"]:entity["end"]],
154
- entity["label"]
155
- ))
156
- last_end = entity["end"]
157
-
158
- # Add any remaining text after the last entity
159
- if last_end < len(ocr_text):
160
- highlighted_text.append((ocr_text[last_end:], None))
161
-
162
- # Create TextWithMetadata instance with the highlighted text and metadata
163
- result = TextWithMetadata(highlighted_text, original_text=ocr_text, entities=ner_results)
164
- return result, result # Return twice: once for display, once for state
165
-
166
- # If NER is disabled, return the text without highlighting
167
- result = TextWithMetadata([(ocr_text, None)], original_text=ocr_text, entities=[])
168
- return result, result # Return twice: once for display, once for state
169
 
170
  css = """
171
  /* Overall app styling */
@@ -235,7 +194,6 @@ with gr.Blocks(css=css) as demo:
235
  # Add state variables to store OCR results
236
  ocr_state = gr.State()
237
 
238
- gr.Image("Caracal.jpg", interactive=False)
239
  with gr.Tab(label="Image Input", elem_classes="tabs"):
240
  with gr.Row():
241
  with gr.Column(elem_classes="input-container"):
 
1
  import gradio as gr
2
  import spaces
3
+ from transformers import AutoModelForImageTextToText, AutoProcessor, Qwen2_5_VLForConditionalGeneration
4
  from qwen_vl_utils import process_vision_info
5
  import torch
6
  from PIL import Image
 
53
  "Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True, torch_dtype="auto"
54
  ).cuda().eval(),
55
 
56
+ "nanonets/Nanonets-OCR-s": AutoModelForImageTextToText.from_pretrained(
57
+ "nanonets/Nanonets-OCR-s", trust_remote_code=True, torch_dtype="auto"
58
  ).cuda().eval()
59
  }
60
 
 
62
  "Qwen/Qwen2.5-VL-7B-Instruct": AutoProcessor.from_pretrained(
63
  "Qwen/Qwen2.5-VL-7B-Instruct", trust_remote_code=True
64
  ),
65
+ "nanonets/Nanonets-OCR-s": AutoProcessor.from_pretrained(
66
+ "nanonets/Nanonets-OCR-s", trust_remote_code=True
 
67
  )
68
  }
69
 
 
78
  prompt_suffix = "<|end|>\n"
79
 
80
  @spaces.GPU
81
+ def run_example(image:str, model_id:str = "nanonets/Nanonets-OCR-s", prompt: str = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Return the equations in LaTeX representation. If there is an image in the document and image caption is not present, add a small description of the image inside the <img></img> tag; otherwise, add the image caption inside <img></img>. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""):
82
+
 
83
  image_path = array_to_image_path(image)
84
 
85
  model = models[model_id]
86
  processor = processors[model_id]
87
 
 
88
  image = Image.fromarray(image).convert("RGB")
89
  messages = [
90
  {
 
94
  "type": "image",
95
  "image": image_path,
96
  },
97
+ {"type": "text", "text": prompt},
98
  ],
99
  }
100
  ]
 
123
  )
124
 
125
  ocr_text = output_text[0]
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ return ocr_text, ocr_text # Return twice: once for display, once for state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  css = """
130
  /* Overall app styling */
 
194
  # Add state variables to store OCR results
195
  ocr_state = gr.State()
196
 
 
197
  with gr.Tab(label="Image Input", elem_classes="tabs"):
198
  with gr.Row():
199
  with gr.Column(elem_classes="input-container"):