Update app.py
Browse files
app.py
CHANGED
@@ -491,7 +491,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
491 |
global pdf_cache
|
492 |
|
493 |
if not file_path or not os.path.exists(file_path):
|
494 |
-
return None, "
|
495 |
|
496 |
file_ext = os.path.splitext(file_path)[1].lower()
|
497 |
|
@@ -500,7 +500,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
500 |
# Load PDF pages
|
501 |
images = load_images_from_pdf(file_path)
|
502 |
if not images:
|
503 |
-
return None, "
|
504 |
|
505 |
pdf_cache.update({
|
506 |
"images": images,
|
@@ -511,7 +511,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
511 |
"results": []
|
512 |
})
|
513 |
|
514 |
-
return images[0], f"
|
515 |
|
516 |
elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
|
517 |
# Load single image
|
@@ -526,13 +526,13 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
|
|
526 |
"results": []
|
527 |
})
|
528 |
|
529 |
-
return image, "
|
530 |
else:
|
531 |
-
return None, f"
|
532 |
|
533 |
except Exception as e:
|
534 |
print(f"Error loading file: {e}")
|
535 |
-
return None, f"
|
536 |
|
537 |
|
538 |
def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
|
@@ -540,7 +540,7 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
|
|
540 |
global pdf_cache
|
541 |
|
542 |
if not pdf_cache["images"]:
|
543 |
-
return None, '<div class="page-info"
|
544 |
|
545 |
if direction == "prev":
|
546 |
pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1)
|
@@ -552,10 +552,10 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
|
|
552 |
|
553 |
index = pdf_cache["current_page"]
|
554 |
current_image_preview = pdf_cache["images"][index]
|
555 |
-
page_info_html = f'<div class="page-info"
|
556 |
|
557 |
# Initialize default result values
|
558 |
-
markdown_content = "
|
559 |
processed_img = None
|
560 |
layout_json = None
|
561 |
|
@@ -565,7 +565,7 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
|
|
565 |
pdf_cache["results"][index]):
|
566 |
|
567 |
result = pdf_cache["results"][index]
|
568 |
-
markdown_content = result.get('markdown_content') or result.get('raw_output', '
|
569 |
processed_img = result.get('processed_image', None) # Get the processed image
|
570 |
layout_json = result.get('layout_result', None) # Get the layout JSON
|
571 |
|
@@ -635,26 +635,12 @@ def create_gradio_interface():
|
|
635 |
}
|
636 |
"""
|
637 |
|
638 |
-
with gr.Blocks(theme=gr.themes.Soft(), css=css, title="
|
639 |
|
640 |
# Header
|
641 |
gr.HTML("""
|
642 |
<div class="title" style="text-align: center">
|
643 |
-
<h1>๐
|
644 |
-
<p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
|
645 |
-
A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
|
646 |
-
</p>
|
647 |
-
<div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
|
648 |
-
<a href="https://huggingface.co/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
|
649 |
-
๐ Hugging Face Model
|
650 |
-
</a>
|
651 |
-
<a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
|
652 |
-
๐ Release Blog
|
653 |
-
</a>
|
654 |
-
<a href="https://github.com/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
|
655 |
-
๐ป GitHub Repository
|
656 |
-
</a>
|
657 |
-
</div>
|
658 |
</div>
|
659 |
""")
|
660 |
|
@@ -665,14 +651,14 @@ def create_gradio_interface():
|
|
665 |
|
666 |
# File input
|
667 |
file_input = gr.File(
|
668 |
-
label="
|
669 |
file_types=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".pdf"],
|
670 |
type="filepath"
|
671 |
)
|
672 |
|
673 |
# Image preview
|
674 |
image_preview = gr.Image(
|
675 |
-
label="
|
676 |
type="pil",
|
677 |
interactive=False,
|
678 |
height=300
|
@@ -680,43 +666,40 @@ def create_gradio_interface():
|
|
680 |
|
681 |
# Page navigation for PDFs
|
682 |
with gr.Row():
|
683 |
-
prev_page_btn = gr.Button("โ
|
684 |
-
page_info = gr.HTML('<div class="page-info"
|
685 |
-
next_page_btn = gr.Button("
|
686 |
|
687 |
# Advanced settings
|
688 |
-
with gr.Accordion("
|
689 |
max_new_tokens = gr.Slider(
|
690 |
minimum=1000,
|
691 |
maximum=32000,
|
692 |
value=24000,
|
693 |
step=1000,
|
694 |
-
label="
|
695 |
-
info="Maximum number of tokens to generate"
|
696 |
)
|
697 |
|
698 |
min_pixels = gr.Number(
|
699 |
value=MIN_PIXELS,
|
700 |
-
label="
|
701 |
-
info="Minimum image resolution"
|
702 |
)
|
703 |
|
704 |
max_pixels = gr.Number(
|
705 |
value=MAX_PIXELS,
|
706 |
-
label="
|
707 |
-
info="Maximum image resolution"
|
708 |
)
|
709 |
|
710 |
# Process button
|
711 |
process_btn = gr.Button(
|
712 |
-
"๐
|
713 |
variant="primary",
|
714 |
elem_classes=["process-button"],
|
715 |
size="lg"
|
716 |
)
|
717 |
|
718 |
# Clear button
|
719 |
-
clear_btn = gr.Button("๐๏ธ
|
720 |
|
721 |
# Right column - Results
|
722 |
with gr.Column(scale=2):
|
@@ -724,23 +707,23 @@ def create_gradio_interface():
|
|
724 |
# Results tabs
|
725 |
with gr.Tabs():
|
726 |
# Processed image tab
|
727 |
-
with gr.Tab("๐ผ๏ธ
|
728 |
processed_image = gr.Image(
|
729 |
-
label="
|
730 |
type="pil",
|
731 |
interactive=False,
|
732 |
height=500
|
733 |
)
|
734 |
# Markdown output tab
|
735 |
-
with gr.Tab("๐
|
736 |
markdown_output = gr.Markdown(
|
737 |
-
value="
|
738 |
height=500
|
739 |
)
|
740 |
# JSON layout tab
|
741 |
-
with gr.Tab("๐
|
742 |
json_output = gr.JSON(
|
743 |
-
label="
|
744 |
value=None
|
745 |
)
|
746 |
|
@@ -751,10 +734,10 @@ def create_gradio_interface():
|
|
751 |
|
752 |
try:
|
753 |
if not file_path:
|
754 |
-
return None, "
|
755 |
|
756 |
if model is None:
|
757 |
-
return None, "
|
758 |
|
759 |
# Load and preview file
|
760 |
image, page_info = load_file_for_preview(file_path)
|
@@ -775,7 +758,7 @@ def create_gradio_interface():
|
|
775 |
)
|
776 |
all_results.append(result)
|
777 |
if result.get('markdown_content'):
|
778 |
-
all_markdown.append(f"##
|
779 |
|
780 |
pdf_cache["results"] = all_results
|
781 |
pdf_cache["is_parsed"] = True
|
@@ -807,7 +790,7 @@ def create_gradio_interface():
|
|
807 |
pdf_cache["is_parsed"] = True
|
808 |
|
809 |
# Check if the content contains mostly Arabic text
|
810 |
-
content = result['markdown_content'] or "
|
811 |
if is_arabic_text(content):
|
812 |
markdown_update = gr.update(value=content, rtl=True)
|
813 |
else:
|
@@ -820,7 +803,7 @@ def create_gradio_interface():
|
|
820 |
)
|
821 |
|
822 |
except Exception as e:
|
823 |
-
error_msg = f"
|
824 |
print(error_msg)
|
825 |
traceback.print_exc()
|
826 |
return None, error_msg, None
|
@@ -828,7 +811,7 @@ def create_gradio_interface():
|
|
828 |
def handle_file_upload(file_path):
|
829 |
"""Handle file upload and show preview"""
|
830 |
if not file_path:
|
831 |
-
return None, "
|
832 |
|
833 |
image, page_info = load_file_for_preview(file_path)
|
834 |
return image, page_info
|
@@ -850,9 +833,9 @@ def create_gradio_interface():
|
|
850 |
return (
|
851 |
None, # file_input
|
852 |
None, # image_preview
|
853 |
-
'<div class="page-info"
|
854 |
None, # processed_image
|
855 |
-
"
|
856 |
None, # json_output
|
857 |
)
|
858 |
|
@@ -901,4 +884,4 @@ if __name__ == "__main__":
|
|
901 |
share=False,
|
902 |
debug=True,
|
903 |
show_error=True
|
904 |
-
)
|
|
|
491 |
global pdf_cache
|
492 |
|
493 |
if not file_path or not os.path.exists(file_path):
|
494 |
+
return None, "ํ์ผ์ด ์ ํ๋์ง ์์์ต๋๋ค"
|
495 |
|
496 |
file_ext = os.path.splitext(file_path)[1].lower()
|
497 |
|
|
|
500 |
# Load PDF pages
|
501 |
images = load_images_from_pdf(file_path)
|
502 |
if not images:
|
503 |
+
return None, "PDF ๋ก๋ ์คํจ"
|
504 |
|
505 |
pdf_cache.update({
|
506 |
"images": images,
|
|
|
511 |
"results": []
|
512 |
})
|
513 |
|
514 |
+
return images[0], f"ํ์ด์ง 1 / {len(images)}"
|
515 |
|
516 |
elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
|
517 |
# Load single image
|
|
|
526 |
"results": []
|
527 |
})
|
528 |
|
529 |
+
return image, "ํ์ด์ง 1 / 1"
|
530 |
else:
|
531 |
+
return None, f"์ง์๋์ง ์๋ ํ์ผ ํ์: {file_ext}"
|
532 |
|
533 |
except Exception as e:
|
534 |
print(f"Error loading file: {e}")
|
535 |
+
return None, f"ํ์ผ ๋ก๋ ์ค๋ฅ: {str(e)}"
|
536 |
|
537 |
|
538 |
def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
|
|
|
540 |
global pdf_cache
|
541 |
|
542 |
if not pdf_cache["images"]:
|
543 |
+
return None, '<div class="page-info">ํ์ผ์ด ๋ก๋๋์ง ์์์ต๋๋ค</div>', "์์ง ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค", None, None
|
544 |
|
545 |
if direction == "prev":
|
546 |
pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1)
|
|
|
552 |
|
553 |
index = pdf_cache["current_page"]
|
554 |
current_image_preview = pdf_cache["images"][index]
|
555 |
+
page_info_html = f'<div class="page-info">ํ์ด์ง {index + 1} / {pdf_cache["total_pages"]}</div>'
|
556 |
|
557 |
# Initialize default result values
|
558 |
+
markdown_content = "ํ์ด์ง๊ฐ ์์ง ์ฒ๋ฆฌ๋์ง ์์์ต๋๋ค"
|
559 |
processed_img = None
|
560 |
layout_json = None
|
561 |
|
|
|
565 |
pdf_cache["results"][index]):
|
566 |
|
567 |
result = pdf_cache["results"][index]
|
568 |
+
markdown_content = result.get('markdown_content') or result.get('raw_output', '์ฌ์ฉ ๊ฐ๋ฅํ ์ฝํ
์ธ ๊ฐ ์์ต๋๋ค')
|
569 |
processed_img = result.get('processed_image', None) # Get the processed image
|
570 |
layout_json = result.get('layout_result', None) # Get the layout JSON
|
571 |
|
|
|
635 |
}
|
636 |
"""
|
637 |
|
638 |
+
with gr.Blocks(theme=gr.themes.Soft(), css=css, title="VIDraft-NH-OCR") as demo:
|
639 |
|
640 |
# Header
|
641 |
gr.HTML("""
|
642 |
<div class="title" style="text-align: center">
|
643 |
+
<h1>๐ VIDraft-NH-OCR</h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
644 |
</div>
|
645 |
""")
|
646 |
|
|
|
651 |
|
652 |
# File input
|
653 |
file_input = gr.File(
|
654 |
+
label="์ด๋ฏธ์ง ๋๋ PDF ์
๋ก๋",
|
655 |
file_types=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".pdf"],
|
656 |
type="filepath"
|
657 |
)
|
658 |
|
659 |
# Image preview
|
660 |
image_preview = gr.Image(
|
661 |
+
label="๋ฏธ๋ฆฌ๋ณด๊ธฐ",
|
662 |
type="pil",
|
663 |
interactive=False,
|
664 |
height=300
|
|
|
666 |
|
667 |
# Page navigation for PDFs
|
668 |
with gr.Row():
|
669 |
+
prev_page_btn = gr.Button("โ ์ด์ ", size="md")
|
670 |
+
page_info = gr.HTML('<div class="page-info">ํ์ผ์ด ๋ก๋๋์ง ์์์ต๋๋ค</div>')
|
671 |
+
next_page_btn = gr.Button("๋ค์ โถ", size="md")
|
672 |
|
673 |
# Advanced settings
|
674 |
+
with gr.Accordion("๊ณ ๊ธ ์ค์ ", open=False):
|
675 |
max_new_tokens = gr.Slider(
|
676 |
minimum=1000,
|
677 |
maximum=32000,
|
678 |
value=24000,
|
679 |
step=1000,
|
680 |
+
label="์ต๋ ํ ํฐ ์"
|
|
|
681 |
)
|
682 |
|
683 |
min_pixels = gr.Number(
|
684 |
value=MIN_PIXELS,
|
685 |
+
label="์ต์ ํฝ์
"
|
|
|
686 |
)
|
687 |
|
688 |
max_pixels = gr.Number(
|
689 |
value=MAX_PIXELS,
|
690 |
+
label="์ต๋ ํฝ์
"
|
|
|
691 |
)
|
692 |
|
693 |
# Process button
|
694 |
process_btn = gr.Button(
|
695 |
+
"๐ ๋ฌธ์ ์ฒ๋ฆฌ",
|
696 |
variant="primary",
|
697 |
elem_classes=["process-button"],
|
698 |
size="lg"
|
699 |
)
|
700 |
|
701 |
# Clear button
|
702 |
+
clear_btn = gr.Button("๐๏ธ ๋ชจ๋ ์ง์ฐ๊ธฐ", variant="secondary")
|
703 |
|
704 |
# Right column - Results
|
705 |
with gr.Column(scale=2):
|
|
|
707 |
# Results tabs
|
708 |
with gr.Tabs():
|
709 |
# Processed image tab
|
710 |
+
with gr.Tab("๐ผ๏ธ ์ฒ๋ฆฌ๋ ์ด๋ฏธ์ง"):
|
711 |
processed_image = gr.Image(
|
712 |
+
label="๋ ์ด์์ ๊ฐ์ง ์ด๋ฏธ์ง",
|
713 |
type="pil",
|
714 |
interactive=False,
|
715 |
height=500
|
716 |
)
|
717 |
# Markdown output tab
|
718 |
+
with gr.Tab("๐ ์ถ์ถ๋ ์ฝํ
์ธ "):
|
719 |
markdown_output = gr.Markdown(
|
720 |
+
value="'๋ฌธ์ ์ฒ๋ฆฌ'๋ฅผ ํด๋ฆญํ์ฌ ์ฝํ
์ธ ๋ฅผ ์ถ์ถํ์ธ์...",
|
721 |
height=500
|
722 |
)
|
723 |
# JSON layout tab
|
724 |
+
with gr.Tab("๐ ๋ ์ด์์ JSON"):
|
725 |
json_output = gr.JSON(
|
726 |
+
label="๋ ์ด์์ ๋ถ์ ๊ฒฐ๊ณผ",
|
727 |
value=None
|
728 |
)
|
729 |
|
|
|
734 |
|
735 |
try:
|
736 |
if not file_path:
|
737 |
+
return None, "๋จผ์ ํ์ผ์ ์
๋ก๋ํ์ธ์.", None
|
738 |
|
739 |
if model is None:
|
740 |
+
return None, "๋ชจ๋ธ์ด ๋ก๋๋์ง ์์์ต๋๋ค. ํ์ด์ง๋ฅผ ์๋ก๊ณ ์นจํ๊ณ ๋ค์ ์๋ํ์ธ์.", None
|
741 |
|
742 |
# Load and preview file
|
743 |
image, page_info = load_file_for_preview(file_path)
|
|
|
758 |
)
|
759 |
all_results.append(result)
|
760 |
if result.get('markdown_content'):
|
761 |
+
all_markdown.append(f"## ํ์ด์ง {i+1}\n\n{result['markdown_content']}")
|
762 |
|
763 |
pdf_cache["results"] = all_results
|
764 |
pdf_cache["is_parsed"] = True
|
|
|
790 |
pdf_cache["is_parsed"] = True
|
791 |
|
792 |
# Check if the content contains mostly Arabic text
|
793 |
+
content = result['markdown_content'] or "์ถ์ถ๋ ์ฝํ
์ธ ๊ฐ ์์ต๋๋ค"
|
794 |
if is_arabic_text(content):
|
795 |
markdown_update = gr.update(value=content, rtl=True)
|
796 |
else:
|
|
|
803 |
)
|
804 |
|
805 |
except Exception as e:
|
806 |
+
error_msg = f"๋ฌธ์ ์ฒ๋ฆฌ ์ค๋ฅ: {str(e)}"
|
807 |
print(error_msg)
|
808 |
traceback.print_exc()
|
809 |
return None, error_msg, None
|
|
|
811 |
def handle_file_upload(file_path):
|
812 |
"""Handle file upload and show preview"""
|
813 |
if not file_path:
|
814 |
+
return None, "ํ์ผ์ด ๋ก๋๋์ง ์์์ต๋๋ค"
|
815 |
|
816 |
image, page_info = load_file_for_preview(file_path)
|
817 |
return image, page_info
|
|
|
833 |
return (
|
834 |
None, # file_input
|
835 |
None, # image_preview
|
836 |
+
'<div class="page-info">ํ์ผ์ด ๋ก๋๋์ง ์์์ต๋๋ค</div>', # page_info
|
837 |
None, # processed_image
|
838 |
+
"'๋ฌธ์ ์ฒ๋ฆฌ'๋ฅผ ํด๋ฆญํ์ฌ ์ฝํ
์ธ ๋ฅผ ์ถ์ถํ์ธ์...", # markdown_output
|
839 |
None, # json_output
|
840 |
)
|
841 |
|
|
|
884 |
share=False,
|
885 |
debug=True,
|
886 |
show_error=True
|
887 |
+
)
|