openfree commited on
Commit
1dd7171
ยท
verified ยท
1 Parent(s): 5d256ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -55
app.py CHANGED
@@ -491,7 +491,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
491
  global pdf_cache
492
 
493
  if not file_path or not os.path.exists(file_path):
494
- return None, "No file selected"
495
 
496
  file_ext = os.path.splitext(file_path)[1].lower()
497
 
@@ -500,7 +500,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
500
  # Load PDF pages
501
  images = load_images_from_pdf(file_path)
502
  if not images:
503
- return None, "Failed to load PDF"
504
 
505
  pdf_cache.update({
506
  "images": images,
@@ -511,7 +511,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
511
  "results": []
512
  })
513
 
514
- return images[0], f"Page 1 / {len(images)}"
515
 
516
  elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
517
  # Load single image
@@ -526,13 +526,13 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
526
  "results": []
527
  })
528
 
529
- return image, "Page 1 / 1"
530
  else:
531
- return None, f"Unsupported file format: {file_ext}"
532
 
533
  except Exception as e:
534
  print(f"Error loading file: {e}")
535
- return None, f"Error loading file: {str(e)}"
536
 
537
 
538
  def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
@@ -540,7 +540,7 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
540
  global pdf_cache
541
 
542
  if not pdf_cache["images"]:
543
- return None, '<div class="page-info">No file loaded</div>', "No results yet", None, None
544
 
545
  if direction == "prev":
546
  pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1)
@@ -552,10 +552,10 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
552
 
553
  index = pdf_cache["current_page"]
554
  current_image_preview = pdf_cache["images"][index]
555
- page_info_html = f'<div class="page-info">Page {index + 1} / {pdf_cache["total_pages"]}</div>'
556
 
557
  # Initialize default result values
558
- markdown_content = "Page not processed yet"
559
  processed_img = None
560
  layout_json = None
561
 
@@ -565,7 +565,7 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
565
  pdf_cache["results"][index]):
566
 
567
  result = pdf_cache["results"][index]
568
- markdown_content = result.get('markdown_content') or result.get('raw_output', 'No content available')
569
  processed_img = result.get('processed_image', None) # Get the processed image
570
  layout_json = result.get('layout_result', None) # Get the layout JSON
571
 
@@ -635,26 +635,12 @@ def create_gradio_interface():
635
  }
636
  """
637
 
638
- with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Dots.OCR Demo") as demo:
639
 
640
  # Header
641
  gr.HTML("""
642
  <div class="title" style="text-align: center">
643
- <h1>๐Ÿ” Dot-OCR - Multilingual Document Text Extraction</h1>
644
- <p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
645
- A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
646
- </p>
647
- <div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
648
- <a href="https://huggingface.co/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
649
- ๐Ÿ“š Hugging Face Model
650
- </a>
651
- <a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
652
- ๐Ÿ“ Release Blog
653
- </a>
654
- <a href="https://github.com/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
655
- ๐Ÿ’ป GitHub Repository
656
- </a>
657
- </div>
658
  </div>
659
  """)
660
 
@@ -665,14 +651,14 @@ def create_gradio_interface():
665
 
666
  # File input
667
  file_input = gr.File(
668
- label="Upload Image or PDF",
669
  file_types=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".pdf"],
670
  type="filepath"
671
  )
672
 
673
  # Image preview
674
  image_preview = gr.Image(
675
- label="Preview",
676
  type="pil",
677
  interactive=False,
678
  height=300
@@ -680,43 +666,40 @@ def create_gradio_interface():
680
 
681
  # Page navigation for PDFs
682
  with gr.Row():
683
- prev_page_btn = gr.Button("โ—€ Previous", size="md")
684
- page_info = gr.HTML('<div class="page-info">No file loaded</div>')
685
- next_page_btn = gr.Button("Next โ–ถ", size="md")
686
 
687
  # Advanced settings
688
- with gr.Accordion("Advanced Settings", open=False):
689
  max_new_tokens = gr.Slider(
690
  minimum=1000,
691
  maximum=32000,
692
  value=24000,
693
  step=1000,
694
- label="Max New Tokens",
695
- info="Maximum number of tokens to generate"
696
  )
697
 
698
  min_pixels = gr.Number(
699
  value=MIN_PIXELS,
700
- label="Min Pixels",
701
- info="Minimum image resolution"
702
  )
703
 
704
  max_pixels = gr.Number(
705
  value=MAX_PIXELS,
706
- label="Max Pixels",
707
- info="Maximum image resolution"
708
  )
709
 
710
  # Process button
711
  process_btn = gr.Button(
712
- "๐Ÿš€ Process Document",
713
  variant="primary",
714
  elem_classes=["process-button"],
715
  size="lg"
716
  )
717
 
718
  # Clear button
719
- clear_btn = gr.Button("๐Ÿ—‘๏ธ Clear All", variant="secondary")
720
 
721
  # Right column - Results
722
  with gr.Column(scale=2):
@@ -724,23 +707,23 @@ def create_gradio_interface():
724
  # Results tabs
725
  with gr.Tabs():
726
  # Processed image tab
727
- with gr.Tab("๐Ÿ–ผ๏ธ Processed Image"):
728
  processed_image = gr.Image(
729
- label="Image with Layout Detection",
730
  type="pil",
731
  interactive=False,
732
  height=500
733
  )
734
  # Markdown output tab
735
- with gr.Tab("๐Ÿ“ Extracted Content"):
736
  markdown_output = gr.Markdown(
737
- value="Click 'Process Document' to see extracted content...",
738
  height=500
739
  )
740
  # JSON layout tab
741
- with gr.Tab("๐Ÿ“‹ Layout JSON"):
742
  json_output = gr.JSON(
743
- label="Layout Analysis Results",
744
  value=None
745
  )
746
 
@@ -751,10 +734,10 @@ def create_gradio_interface():
751
 
752
  try:
753
  if not file_path:
754
- return None, "Please upload a file first.", None
755
 
756
  if model is None:
757
- return None, "Model not loaded. Please refresh the page and try again.", None
758
 
759
  # Load and preview file
760
  image, page_info = load_file_for_preview(file_path)
@@ -775,7 +758,7 @@ def create_gradio_interface():
775
  )
776
  all_results.append(result)
777
  if result.get('markdown_content'):
778
- all_markdown.append(f"## Page {i+1}\n\n{result['markdown_content']}")
779
 
780
  pdf_cache["results"] = all_results
781
  pdf_cache["is_parsed"] = True
@@ -807,7 +790,7 @@ def create_gradio_interface():
807
  pdf_cache["is_parsed"] = True
808
 
809
  # Check if the content contains mostly Arabic text
810
- content = result['markdown_content'] or "No content extracted"
811
  if is_arabic_text(content):
812
  markdown_update = gr.update(value=content, rtl=True)
813
  else:
@@ -820,7 +803,7 @@ def create_gradio_interface():
820
  )
821
 
822
  except Exception as e:
823
- error_msg = f"Error processing document: {str(e)}"
824
  print(error_msg)
825
  traceback.print_exc()
826
  return None, error_msg, None
@@ -828,7 +811,7 @@ def create_gradio_interface():
828
  def handle_file_upload(file_path):
829
  """Handle file upload and show preview"""
830
  if not file_path:
831
- return None, "No file loaded"
832
 
833
  image, page_info = load_file_for_preview(file_path)
834
  return image, page_info
@@ -850,9 +833,9 @@ def create_gradio_interface():
850
  return (
851
  None, # file_input
852
  None, # image_preview
853
- '<div class="page-info">No file loaded</div>', # page_info
854
  None, # processed_image
855
- "Click 'Process Document' to see extracted content...", # markdown_output
856
  None, # json_output
857
  )
858
 
@@ -901,4 +884,4 @@ if __name__ == "__main__":
901
  share=False,
902
  debug=True,
903
  show_error=True
904
- )
 
491
  global pdf_cache
492
 
493
  if not file_path or not os.path.exists(file_path):
494
+ return None, "ํŒŒ์ผ์ด ์„ ํƒ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค"
495
 
496
  file_ext = os.path.splitext(file_path)[1].lower()
497
 
 
500
  # Load PDF pages
501
  images = load_images_from_pdf(file_path)
502
  if not images:
503
+ return None, "PDF ๋กœ๋“œ ์‹คํŒจ"
504
 
505
  pdf_cache.update({
506
  "images": images,
 
511
  "results": []
512
  })
513
 
514
+ return images[0], f"ํŽ˜์ด์ง€ 1 / {len(images)}"
515
 
516
  elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
517
  # Load single image
 
526
  "results": []
527
  })
528
 
529
+ return image, "ํŽ˜์ด์ง€ 1 / 1"
530
  else:
531
+ return None, f"์ง€์›๋˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {file_ext}"
532
 
533
  except Exception as e:
534
  print(f"Error loading file: {e}")
535
+ return None, f"ํŒŒ์ผ ๋กœ๋“œ ์˜ค๋ฅ˜: {str(e)}"
536
 
537
 
538
  def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
 
540
  global pdf_cache
541
 
542
  if not pdf_cache["images"]:
543
+ return None, '<div class="page-info">ํŒŒ์ผ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค</div>', "์•„์ง ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค", None, None
544
 
545
  if direction == "prev":
546
  pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1)
 
552
 
553
  index = pdf_cache["current_page"]
554
  current_image_preview = pdf_cache["images"][index]
555
+ page_info_html = f'<div class="page-info">ํŽ˜์ด์ง€ {index + 1} / {pdf_cache["total_pages"]}</div>'
556
 
557
  # Initialize default result values
558
+ markdown_content = "ํŽ˜์ด์ง€๊ฐ€ ์•„์ง ์ฒ˜๋ฆฌ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค"
559
  processed_img = None
560
  layout_json = None
561
 
 
565
  pdf_cache["results"][index]):
566
 
567
  result = pdf_cache["results"][index]
568
+ markdown_content = result.get('markdown_content') or result.get('raw_output', '์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ์ฝ˜ํ…์ธ ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค')
569
  processed_img = result.get('processed_image', None) # Get the processed image
570
  layout_json = result.get('layout_result', None) # Get the layout JSON
571
 
 
635
  }
636
  """
637
 
638
+ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="VIDraft-NH-OCR") as demo:
639
 
640
  # Header
641
  gr.HTML("""
642
  <div class="title" style="text-align: center">
643
+ <h1>๐Ÿ” VIDraft-NH-OCR</h1>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
644
  </div>
645
  """)
646
 
 
651
 
652
  # File input
653
  file_input = gr.File(
654
+ label="์ด๋ฏธ์ง€ ๋˜๋Š” PDF ์—…๋กœ๋“œ",
655
  file_types=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".pdf"],
656
  type="filepath"
657
  )
658
 
659
  # Image preview
660
  image_preview = gr.Image(
661
+ label="๋ฏธ๋ฆฌ๋ณด๊ธฐ",
662
  type="pil",
663
  interactive=False,
664
  height=300
 
666
 
667
  # Page navigation for PDFs
668
  with gr.Row():
669
+ prev_page_btn = gr.Button("โ—€ ์ด์ „", size="md")
670
+ page_info = gr.HTML('<div class="page-info">ํŒŒ์ผ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค</div>')
671
+ next_page_btn = gr.Button("๋‹ค์Œ โ–ถ", size="md")
672
 
673
  # Advanced settings
674
+ with gr.Accordion("๊ณ ๊ธ‰ ์„ค์ •", open=False):
675
  max_new_tokens = gr.Slider(
676
  minimum=1000,
677
  maximum=32000,
678
  value=24000,
679
  step=1000,
680
+ label="์ตœ๋Œ€ ํ† ํฐ ์ˆ˜"
 
681
  )
682
 
683
  min_pixels = gr.Number(
684
  value=MIN_PIXELS,
685
+ label="์ตœ์†Œ ํ”ฝ์…€"
 
686
  )
687
 
688
  max_pixels = gr.Number(
689
  value=MAX_PIXELS,
690
+ label="์ตœ๋Œ€ ํ”ฝ์…€"
 
691
  )
692
 
693
  # Process button
694
  process_btn = gr.Button(
695
+ "๐Ÿš€ ๋ฌธ์„œ ์ฒ˜๋ฆฌ",
696
  variant="primary",
697
  elem_classes=["process-button"],
698
  size="lg"
699
  )
700
 
701
  # Clear button
702
+ clear_btn = gr.Button("๐Ÿ—‘๏ธ ๋ชจ๋‘ ์ง€์šฐ๊ธฐ", variant="secondary")
703
 
704
  # Right column - Results
705
  with gr.Column(scale=2):
 
707
  # Results tabs
708
  with gr.Tabs():
709
  # Processed image tab
710
+ with gr.Tab("๐Ÿ–ผ๏ธ ์ฒ˜๋ฆฌ๋œ ์ด๋ฏธ์ง€"):
711
  processed_image = gr.Image(
712
+ label="๋ ˆ์ด์•„์›ƒ ๊ฐ์ง€ ์ด๋ฏธ์ง€",
713
  type="pil",
714
  interactive=False,
715
  height=500
716
  )
717
  # Markdown output tab
718
+ with gr.Tab("๐Ÿ“ ์ถ”์ถœ๋œ ์ฝ˜ํ…์ธ "):
719
  markdown_output = gr.Markdown(
720
+ value="'๋ฌธ์„œ ์ฒ˜๋ฆฌ'๋ฅผ ํด๋ฆญํ•˜์—ฌ ์ฝ˜ํ…์ธ ๋ฅผ ์ถ”์ถœํ•˜์„ธ์š”...",
721
  height=500
722
  )
723
  # JSON layout tab
724
+ with gr.Tab("๐Ÿ“‹ ๋ ˆ์ด์•„์›ƒ JSON"):
725
  json_output = gr.JSON(
726
+ label="๋ ˆ์ด์•„์›ƒ ๋ถ„์„ ๊ฒฐ๊ณผ",
727
  value=None
728
  )
729
 
 
734
 
735
  try:
736
  if not file_path:
737
+ return None, "๋จผ์ € ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š”.", None
738
 
739
  if model is None:
740
+ return None, "๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ํŽ˜์ด์ง€๋ฅผ ์ƒˆ๋กœ๊ณ ์นจํ•˜๊ณ  ๋‹ค์‹œ ์‹œ๋„ํ•˜์„ธ์š”.", None
741
 
742
  # Load and preview file
743
  image, page_info = load_file_for_preview(file_path)
 
758
  )
759
  all_results.append(result)
760
  if result.get('markdown_content'):
761
+ all_markdown.append(f"## ํŽ˜์ด์ง€ {i+1}\n\n{result['markdown_content']}")
762
 
763
  pdf_cache["results"] = all_results
764
  pdf_cache["is_parsed"] = True
 
790
  pdf_cache["is_parsed"] = True
791
 
792
  # Check if the content contains mostly Arabic text
793
+ content = result['markdown_content'] or "์ถ”์ถœ๋œ ์ฝ˜ํ…์ธ ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค"
794
  if is_arabic_text(content):
795
  markdown_update = gr.update(value=content, rtl=True)
796
  else:
 
803
  )
804
 
805
  except Exception as e:
806
+ error_msg = f"๋ฌธ์„œ ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {str(e)}"
807
  print(error_msg)
808
  traceback.print_exc()
809
  return None, error_msg, None
 
811
  def handle_file_upload(file_path):
812
  """Handle file upload and show preview"""
813
  if not file_path:
814
+ return None, "ํŒŒ์ผ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค"
815
 
816
  image, page_info = load_file_for_preview(file_path)
817
  return image, page_info
 
833
  return (
834
  None, # file_input
835
  None, # image_preview
836
+ '<div class="page-info">ํŒŒ์ผ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค</div>', # page_info
837
  None, # processed_image
838
+ "'๋ฌธ์„œ ์ฒ˜๋ฆฌ'๋ฅผ ํด๋ฆญํ•˜์—ฌ ์ฝ˜ํ…์ธ ๋ฅผ ์ถ”์ถœํ•˜์„ธ์š”...", # markdown_output
839
  None, # json_output
840
  )
841
 
 
884
  share=False,
885
  debug=True,
886
  show_error=True
887
+ )