muhammadsalmanalfaridzi commited on
Commit
79a92e5
·
verified ·
1 Parent(s): a8db4d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -13,12 +13,6 @@ from docling.utils.utils import create_hash
13
  import pandas as pd
14
  import time
15
  import datetime
16
- import spaces
17
- import os
18
- # Pastikan CUDA_HOME diatur (meskipun nvcc tidak tersedia)
19
- os.environ["CUDA_HOME"] = "/usr/local/cuda"
20
- # Jika didukung oleh modul, coba paksa fallback custom kernel
21
- os.environ["FORCE_FALLBACK_DEFORMABLE_ATTENTION"] = "1"
22
 
23
  # Set up logging
24
  logging.basicConfig(level=logging.INFO)
@@ -37,6 +31,7 @@ pipeline_options.ocr_options.lang = ["id", "en"] # OCR languages
37
  def export_tables_and_figures(conv_res, output_dir):
38
  """Exports tables, figures, and multimodal pages from the converted document."""
39
  start_time = time.time()
 
40
  output_files = []
41
 
42
  # Export tables
@@ -62,6 +57,8 @@ def export_tables_and_figures(conv_res, output_dir):
62
  picture_image_filename = output_dir / f"{conv_res.input.file.stem}-picture-{picture_ix + 1}.png"
63
  _log.info(f"Saving Picture to {picture_image_filename}")
64
  picture.image.save(picture_image_filename)
 
 
65
  output_files.append(picture_image_filename)
66
  else:
67
  _log.warning(f"Skipping picture {picture_ix + 1} due to missing image.")
@@ -71,12 +68,14 @@ def export_tables_and_figures(conv_res, output_dir):
71
  for content_text, content_md, content_dt, page_cells, page_segments, page in generate_multimodal_pages(conv_res):
72
  try:
73
  dpi = page._default_image_scale * 72
 
74
  image_width = image_height = 0
75
  image_bytes = None
76
  if page.image:
77
  image_width = page.image.width
78
  image_height = page.image.height
79
  image_bytes = page.image.tobytes()
 
80
  rows.append({
81
  "document": conv_res.input.file.name,
82
  "hash": conv_res.input.document_hash,
@@ -106,10 +105,13 @@ def export_tables_and_figures(conv_res, output_dir):
106
  now = datetime.datetime.now()
107
  output_filename = output_dir / f"multimodal_{now:%Y-%m-%d_%H%M%S}.parquet"
108
  df.to_parquet(output_filename)
 
 
109
  output_files.append(output_filename)
110
 
111
  end_time = time.time() - start_time
112
  _log.info(f"Tables, figures, and multimodal pages exported in {end_time:.2f} seconds.")
 
113
  return [str(file.resolve()) for file in output_files]
114
 
115
  # Main conversion function
@@ -134,6 +136,7 @@ def convert_document(input_file):
134
  out_path = output_dir / res.input.file.stem
135
  out_path.mkdir(parents=True, exist_ok=True)
136
 
 
137
  with (out_path / f"{res.input.file.stem}.md").open("w", encoding="utf-8") as fp:
138
  fp.write(res.document.export_to_markdown())
139
  with (out_path / f"{res.input.file.stem}.json").open("w", encoding="utf-8") as fp:
@@ -141,6 +144,7 @@ def convert_document(input_file):
141
  with (out_path / f"{res.input.file.stem}.yaml").open("w", encoding="utf-8") as fp:
142
  fp.write(yaml.safe_dump(res.document.export_to_dict(), allow_unicode=True))
143
 
 
144
  output_files.append(str((out_path / f"{res.input.file.stem}.md").resolve()))
145
  output_files.append(str((out_path / f"{res.input.file.stem}.json").resolve()))
146
  output_files.append(str((out_path / f"{res.input.file.stem}.yaml").resolve()))
@@ -150,8 +154,7 @@ def convert_document(input_file):
150
 
151
  return output_files
152
 
153
- # Wrap the Gradio interface function with the GPU decorator so that CUDA initialization occurs in a GPU-enabled subprocess.
154
- @spaces.GPU(duration=120)
155
  def gradio_interface(input_file):
156
  output_files = convert_document(input_file)
157
  return output_files
@@ -163,8 +166,8 @@ iface = gr.Interface(
163
  outputs=gr.File(file_count="multiple"),
164
  title="Document Conversion with OCR",
165
  description="Upload your document or image, and get the converted output with OCR and other exports.",
166
- flagging_mode="never", # Updated from allow_flagging
167
- theme=themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
168
  )
169
 
170
  if __name__ == "__main__":
 
13
  import pandas as pd
14
  import time
15
  import datetime
 
 
 
 
 
 
16
 
17
  # Set up logging
18
  logging.basicConfig(level=logging.INFO)
 
31
  def export_tables_and_figures(conv_res, output_dir):
32
  """Exports tables, figures, and multimodal pages from the converted document."""
33
  start_time = time.time()
34
+
35
  output_files = []
36
 
37
  # Export tables
 
57
  picture_image_filename = output_dir / f"{conv_res.input.file.stem}-picture-{picture_ix + 1}.png"
58
  _log.info(f"Saving Picture to {picture_image_filename}")
59
  picture.image.save(picture_image_filename)
60
+
61
+ # Append to output files
62
  output_files.append(picture_image_filename)
63
  else:
64
  _log.warning(f"Skipping picture {picture_ix + 1} due to missing image.")
 
68
  for content_text, content_md, content_dt, page_cells, page_segments, page in generate_multimodal_pages(conv_res):
69
  try:
70
  dpi = page._default_image_scale * 72
71
+ # Ensure page.image exists and handle the case where it may be None
72
  image_width = image_height = 0
73
  image_bytes = None
74
  if page.image:
75
  image_width = page.image.width
76
  image_height = page.image.height
77
  image_bytes = page.image.tobytes()
78
+
79
  rows.append({
80
  "document": conv_res.input.file.name,
81
  "hash": conv_res.input.document_hash,
 
105
  now = datetime.datetime.now()
106
  output_filename = output_dir / f"multimodal_{now:%Y-%m-%d_%H%M%S}.parquet"
107
  df.to_parquet(output_filename)
108
+
109
+ # Append to output files
110
  output_files.append(output_filename)
111
 
112
  end_time = time.time() - start_time
113
  _log.info(f"Tables, figures, and multimodal pages exported in {end_time:.2f} seconds.")
114
+
115
  return [str(file.resolve()) for file in output_files]
116
 
117
  # Main conversion function
 
136
  out_path = output_dir / res.input.file.stem
137
  out_path.mkdir(parents=True, exist_ok=True)
138
 
139
+ # Export Markdown and JSON with utf-8 encoding
140
  with (out_path / f"{res.input.file.stem}.md").open("w", encoding="utf-8") as fp:
141
  fp.write(res.document.export_to_markdown())
142
  with (out_path / f"{res.input.file.stem}.json").open("w", encoding="utf-8") as fp:
 
144
  with (out_path / f"{res.input.file.stem}.yaml").open("w", encoding="utf-8") as fp:
145
  fp.write(yaml.safe_dump(res.document.export_to_dict(), allow_unicode=True))
146
 
147
+ # Append to output files
148
  output_files.append(str((out_path / f"{res.input.file.stem}.md").resolve()))
149
  output_files.append(str((out_path / f"{res.input.file.stem}.json").resolve()))
150
  output_files.append(str((out_path / f"{res.input.file.stem}.yaml").resolve()))
 
154
 
155
  return output_files
156
 
157
+ # Create the Gradio interface
 
158
  def gradio_interface(input_file):
159
  output_files = convert_document(input_file)
160
  return output_files
 
166
  outputs=gr.File(file_count="multiple"),
167
  title="Document Conversion with OCR",
168
  description="Upload your document or image, and get the converted output with OCR and other exports.",
169
+ allow_flagging="never",
170
+ theme=themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"), # Set the theme here
171
  )
172
 
173
  if __name__ == "__main__":