Update app.py
Browse files
app.py
CHANGED
@@ -13,12 +13,6 @@ from docling.utils.utils import create_hash
|
|
13 |
import pandas as pd
|
14 |
import time
|
15 |
import datetime
|
16 |
-
import spaces
|
17 |
-
import os
|
18 |
-
# Pastikan CUDA_HOME diatur (meskipun nvcc tidak tersedia)
|
19 |
-
os.environ["CUDA_HOME"] = "/usr/local/cuda"
|
20 |
-
# Jika didukung oleh modul, coba paksa fallback custom kernel
|
21 |
-
os.environ["FORCE_FALLBACK_DEFORMABLE_ATTENTION"] = "1"
|
22 |
|
23 |
# Set up logging
|
24 |
logging.basicConfig(level=logging.INFO)
|
@@ -37,6 +31,7 @@ pipeline_options.ocr_options.lang = ["id", "en"] # OCR languages
|
|
37 |
def export_tables_and_figures(conv_res, output_dir):
|
38 |
"""Exports tables, figures, and multimodal pages from the converted document."""
|
39 |
start_time = time.time()
|
|
|
40 |
output_files = []
|
41 |
|
42 |
# Export tables
|
@@ -62,6 +57,8 @@ def export_tables_and_figures(conv_res, output_dir):
|
|
62 |
picture_image_filename = output_dir / f"{conv_res.input.file.stem}-picture-{picture_ix + 1}.png"
|
63 |
_log.info(f"Saving Picture to {picture_image_filename}")
|
64 |
picture.image.save(picture_image_filename)
|
|
|
|
|
65 |
output_files.append(picture_image_filename)
|
66 |
else:
|
67 |
_log.warning(f"Skipping picture {picture_ix + 1} due to missing image.")
|
@@ -71,12 +68,14 @@ def export_tables_and_figures(conv_res, output_dir):
|
|
71 |
for content_text, content_md, content_dt, page_cells, page_segments, page in generate_multimodal_pages(conv_res):
|
72 |
try:
|
73 |
dpi = page._default_image_scale * 72
|
|
|
74 |
image_width = image_height = 0
|
75 |
image_bytes = None
|
76 |
if page.image:
|
77 |
image_width = page.image.width
|
78 |
image_height = page.image.height
|
79 |
image_bytes = page.image.tobytes()
|
|
|
80 |
rows.append({
|
81 |
"document": conv_res.input.file.name,
|
82 |
"hash": conv_res.input.document_hash,
|
@@ -106,10 +105,13 @@ def export_tables_and_figures(conv_res, output_dir):
|
|
106 |
now = datetime.datetime.now()
|
107 |
output_filename = output_dir / f"multimodal_{now:%Y-%m-%d_%H%M%S}.parquet"
|
108 |
df.to_parquet(output_filename)
|
|
|
|
|
109 |
output_files.append(output_filename)
|
110 |
|
111 |
end_time = time.time() - start_time
|
112 |
_log.info(f"Tables, figures, and multimodal pages exported in {end_time:.2f} seconds.")
|
|
|
113 |
return [str(file.resolve()) for file in output_files]
|
114 |
|
115 |
# Main conversion function
|
@@ -134,6 +136,7 @@ def convert_document(input_file):
|
|
134 |
out_path = output_dir / res.input.file.stem
|
135 |
out_path.mkdir(parents=True, exist_ok=True)
|
136 |
|
|
|
137 |
with (out_path / f"{res.input.file.stem}.md").open("w", encoding="utf-8") as fp:
|
138 |
fp.write(res.document.export_to_markdown())
|
139 |
with (out_path / f"{res.input.file.stem}.json").open("w", encoding="utf-8") as fp:
|
@@ -141,6 +144,7 @@ def convert_document(input_file):
|
|
141 |
with (out_path / f"{res.input.file.stem}.yaml").open("w", encoding="utf-8") as fp:
|
142 |
fp.write(yaml.safe_dump(res.document.export_to_dict(), allow_unicode=True))
|
143 |
|
|
|
144 |
output_files.append(str((out_path / f"{res.input.file.stem}.md").resolve()))
|
145 |
output_files.append(str((out_path / f"{res.input.file.stem}.json").resolve()))
|
146 |
output_files.append(str((out_path / f"{res.input.file.stem}.yaml").resolve()))
|
@@ -150,8 +154,7 @@ def convert_document(input_file):
|
|
150 |
|
151 |
return output_files
|
152 |
|
153 |
-
#
|
154 |
-
@spaces.GPU(duration=120)
|
155 |
def gradio_interface(input_file):
|
156 |
output_files = convert_document(input_file)
|
157 |
return output_files
|
@@ -163,8 +166,8 @@ iface = gr.Interface(
|
|
163 |
outputs=gr.File(file_count="multiple"),
|
164 |
title="Document Conversion with OCR",
|
165 |
description="Upload your document or image, and get the converted output with OCR and other exports.",
|
166 |
-
|
167 |
-
theme=themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
|
168 |
)
|
169 |
|
170 |
if __name__ == "__main__":
|
|
|
13 |
import pandas as pd
|
14 |
import time
|
15 |
import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Set up logging
|
18 |
logging.basicConfig(level=logging.INFO)
|
|
|
31 |
def export_tables_and_figures(conv_res, output_dir):
|
32 |
"""Exports tables, figures, and multimodal pages from the converted document."""
|
33 |
start_time = time.time()
|
34 |
+
|
35 |
output_files = []
|
36 |
|
37 |
# Export tables
|
|
|
57 |
picture_image_filename = output_dir / f"{conv_res.input.file.stem}-picture-{picture_ix + 1}.png"
|
58 |
_log.info(f"Saving Picture to {picture_image_filename}")
|
59 |
picture.image.save(picture_image_filename)
|
60 |
+
|
61 |
+
# Append to output files
|
62 |
output_files.append(picture_image_filename)
|
63 |
else:
|
64 |
_log.warning(f"Skipping picture {picture_ix + 1} due to missing image.")
|
|
|
68 |
for content_text, content_md, content_dt, page_cells, page_segments, page in generate_multimodal_pages(conv_res):
|
69 |
try:
|
70 |
dpi = page._default_image_scale * 72
|
71 |
+
# Ensure page.image exists and handle the case where it may be None
|
72 |
image_width = image_height = 0
|
73 |
image_bytes = None
|
74 |
if page.image:
|
75 |
image_width = page.image.width
|
76 |
image_height = page.image.height
|
77 |
image_bytes = page.image.tobytes()
|
78 |
+
|
79 |
rows.append({
|
80 |
"document": conv_res.input.file.name,
|
81 |
"hash": conv_res.input.document_hash,
|
|
|
105 |
now = datetime.datetime.now()
|
106 |
output_filename = output_dir / f"multimodal_{now:%Y-%m-%d_%H%M%S}.parquet"
|
107 |
df.to_parquet(output_filename)
|
108 |
+
|
109 |
+
# Append to output files
|
110 |
output_files.append(output_filename)
|
111 |
|
112 |
end_time = time.time() - start_time
|
113 |
_log.info(f"Tables, figures, and multimodal pages exported in {end_time:.2f} seconds.")
|
114 |
+
|
115 |
return [str(file.resolve()) for file in output_files]
|
116 |
|
117 |
# Main conversion function
|
|
|
136 |
out_path = output_dir / res.input.file.stem
|
137 |
out_path.mkdir(parents=True, exist_ok=True)
|
138 |
|
139 |
+
# Export Markdown and JSON with utf-8 encoding
|
140 |
with (out_path / f"{res.input.file.stem}.md").open("w", encoding="utf-8") as fp:
|
141 |
fp.write(res.document.export_to_markdown())
|
142 |
with (out_path / f"{res.input.file.stem}.json").open("w", encoding="utf-8") as fp:
|
|
|
144 |
with (out_path / f"{res.input.file.stem}.yaml").open("w", encoding="utf-8") as fp:
|
145 |
fp.write(yaml.safe_dump(res.document.export_to_dict(), allow_unicode=True))
|
146 |
|
147 |
+
# Append to output files
|
148 |
output_files.append(str((out_path / f"{res.input.file.stem}.md").resolve()))
|
149 |
output_files.append(str((out_path / f"{res.input.file.stem}.json").resolve()))
|
150 |
output_files.append(str((out_path / f"{res.input.file.stem}.yaml").resolve()))
|
|
|
154 |
|
155 |
return output_files
|
156 |
|
157 |
+
# Create the Gradio interface
|
|
|
158 |
def gradio_interface(input_file):
|
159 |
output_files = convert_document(input_file)
|
160 |
return output_files
|
|
|
166 |
outputs=gr.File(file_count="multiple"),
|
167 |
title="Document Conversion with OCR",
|
168 |
description="Upload your document or image, and get the converted output with OCR and other exports.",
|
169 |
+
allow_flagging="never",
|
170 |
+
theme=themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"), # Set the theme here
|
171 |
)
|
172 |
|
173 |
if __name__ == "__main__":
|