Dots-OCR

Runtime error

App Files Files Community

openfree commited on Aug 7

Commit

1dd7171

verified ·

1 Parent(s): 5d256ae

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -55

app.py CHANGED Viewed

@@ -491,7 +491,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
     global pdf_cache
     if not file_path or not os.path.exists(file_path):
-        return None, "No file selected"
     file_ext = os.path.splitext(file_path)[1].lower()
@@ -500,7 +500,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
             # Load PDF pages
             images = load_images_from_pdf(file_path)
             if not images:
-                return None, "Failed to load PDF"
             pdf_cache.update({
                 "images": images,
@@ -511,7 +511,7 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
                 "results": []
             })
-            return images[0], f"Page 1 / {len(images)}"
         elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
             # Load single image
@@ -526,13 +526,13 @@ def load_file_for_preview(file_path: str) -> Tuple[Optional[Image.Image], str]:
                 "results": []
             })
-            return image, "Page 1 / 1"
         else:
-            return None, f"Unsupported file format: {file_ext}"
     except Exception as e:
         print(f"Error loading file: {e}")
-        return None, f"Error loading file: {str(e)}"
 def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
@@ -540,7 +540,7 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
     global pdf_cache
     if not pdf_cache["images"]:
-        return None, '<div class="page-info">No file loaded</div>', "No results yet", None, None
     if direction == "prev":
         pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1)
@@ -552,10 +552,10 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
     index = pdf_cache["current_page"]
     current_image_preview = pdf_cache["images"][index]
-    page_info_html = f'<div class="page-info">Page {index + 1} / {pdf_cache["total_pages"]}</div>'
     # Initialize default result values
-    markdown_content = "Page not processed yet"
     processed_img = None
     layout_json = None
@@ -565,7 +565,7 @@ def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional
         pdf_cache["results"][index]):
         result = pdf_cache["results"][index]
-        markdown_content = result.get('markdown_content') or result.get('raw_output', 'No content available')
         processed_img = result.get('processed_image', None) # Get the processed image
         layout_json = result.get('layout_result', None) # Get the layout JSON
@@ -635,26 +635,12 @@ def create_gradio_interface():
     }
     """
-    with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Dots.OCR Demo") as demo:
         # Header
         gr.HTML("""
         <div class="title" style="text-align: center">
-            <h1>🔍 Dot-OCR - Multilingual Document Text Extraction</h1>
-            <p style="font-size: 1.1em; color: #6b7280; margin-bottom: 0.6em;">
-                A state-of-the-art image/pdf-to-markdown vision language model for intelligent document processing
-            </p>
-            <div style="display: flex; justify-content: center; gap: 20px; margin: 15px 0;">
-                <a href="https://huggingface.co/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
-                    📚 Hugging Face Model
-                </a>
-                <a href="https://github.com/rednote-hilab/dots.ocr/blob/master/assets/blog.md" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
-                    📝 Release Blog
-                </a>
-                <a href="https://github.com/rednote-hilab/dots.ocr" target="_blank" style="text-decoration: none; color: #2563eb; font-weight: 500;">
-                    💻 GitHub Repository
-                </a>
-            </div>
         </div>
         """)
@@ -665,14 +651,14 @@ def create_gradio_interface():
                 # File input
                 file_input = gr.File(
-                    label="Upload Image or PDF",
                     file_types=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".pdf"],
                     type="filepath"
                 )
                 # Image preview
                 image_preview = gr.Image(
-                    label="Preview",
                     type="pil",
                     interactive=False,
                     height=300
@@ -680,43 +666,40 @@ def create_gradio_interface():
                 # Page navigation for PDFs
                 with gr.Row():
-                    prev_page_btn = gr.Button("◀ Previous", size="md")
-                    page_info = gr.HTML('<div class="page-info">No file loaded</div>')
-                    next_page_btn = gr.Button("Next ▶", size="md")
                 # Advanced settings
-                with gr.Accordion("Advanced Settings", open=False):
                     max_new_tokens = gr.Slider(
                         minimum=1000,
                         maximum=32000,
                         value=24000,
                         step=1000,
-                        label="Max New Tokens",
-                        info="Maximum number of tokens to generate"
                     )
                     min_pixels = gr.Number(
                         value=MIN_PIXELS,
-                        label="Min Pixels",
-                        info="Minimum image resolution"
                     )
                     max_pixels = gr.Number(
                         value=MAX_PIXELS,
-                        label="Max Pixels",
-                        info="Maximum image resolution"
                     )
                 # Process button
                 process_btn = gr.Button(
-                    "🚀 Process Document",
                     variant="primary",
                     elem_classes=["process-button"],
                     size="lg"
                 )
                 # Clear button
-                clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
             # Right column - Results
             with gr.Column(scale=2):
@@ -724,23 +707,23 @@ def create_gradio_interface():
                 # Results tabs
                 with gr.Tabs():
                     # Processed image tab
-                    with gr.Tab("🖼️ Processed Image"):
                         processed_image = gr.Image(
-                            label="Image with Layout Detection",
                             type="pil",
                             interactive=False,
                             height=500
                         )
                     # Markdown output tab
-                    with gr.Tab("📝 Extracted Content"):
                         markdown_output = gr.Markdown(
-                            value="Click 'Process Document' to see extracted content...",
                             height=500
                         )
                     # JSON layout tab
-                    with gr.Tab("📋 Layout JSON"):
                         json_output = gr.JSON(
-                            label="Layout Analysis Results",
                             value=None
                         )
@@ -751,10 +734,10 @@ def create_gradio_interface():
             try:
                 if not file_path:
-                    return None, "Please upload a file first.", None
                 if model is None:
-                    return None, "Model not loaded. Please refresh the page and try again.", None
                 # Load and preview file
                 image, page_info = load_file_for_preview(file_path)
@@ -775,7 +758,7 @@ def create_gradio_interface():
                         )
                         all_results.append(result)
                         if result.get('markdown_content'):
-                            all_markdown.append(f"## Page {i+1}\n\n{result['markdown_content']}")
                     pdf_cache["results"] = all_results
                     pdf_cache["is_parsed"] = True
@@ -807,7 +790,7 @@ def create_gradio_interface():
                     pdf_cache["is_parsed"] = True
                     # Check if the content contains mostly Arabic text
-                    content = result['markdown_content'] or "No content extracted"
                     if is_arabic_text(content):
                         markdown_update = gr.update(value=content, rtl=True)
                     else:
@@ -820,7 +803,7 @@ def create_gradio_interface():
                     )
             except Exception as e:
-                error_msg = f"Error processing document: {str(e)}"
                 print(error_msg)
                 traceback.print_exc()
                 return None, error_msg, None
@@ -828,7 +811,7 @@ def create_gradio_interface():
         def handle_file_upload(file_path):
             """Handle file upload and show preview"""
             if not file_path:
-                return None, "No file loaded"
             image, page_info = load_file_for_preview(file_path)
             return image, page_info
@@ -850,9 +833,9 @@ def create_gradio_interface():
             return (
                 None,  # file_input
                 None,  # image_preview
-                '<div class="page-info">No file loaded</div>',  # page_info
                 None,  # processed_image
-                "Click 'Process Document' to see extracted content...",  # markdown_output
                 None,  # json_output
             )
@@ -901,4 +884,4 @@ if __name__ == "__main__":
         share=False,
         debug=True,
         show_error=True
-    )

     global pdf_cache
     if not file_path or not os.path.exists(file_path):
+        return None, "파일이 선택되지 않았습니다"
     file_ext = os.path.splitext(file_path)[1].lower()
             # Load PDF pages
             images = load_images_from_pdf(file_path)
             if not images:
+                return None, "PDF 로드 실패"
             pdf_cache.update({
                 "images": images,
                 "results": []
             })
+            return images[0], f"페이지 1 / {len(images)}"
         elif file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
             # Load single image
                 "results": []
             })
+            return image, "페이지 1 / 1"
         else:
+            return None, f"지원되지 않는 파일 형식: {file_ext}"
     except Exception as e:
         print(f"Error loading file: {e}")
+        return None, f"파일 로드 오류: {str(e)}"
 def turn_page(direction: str) -> Tuple[Optional[Image.Image], str, Any, Optional[Image.Image], Optional[Dict]]:
     global pdf_cache
     if not pdf_cache["images"]:
+        return None, '<div class="page-info">파일이 로드되지 않았습니다</div>', "아직 결과가 없습니다", None, None
     if direction == "prev":
         pdf_cache["current_page"] = max(0, pdf_cache["current_page"] - 1)
     index = pdf_cache["current_page"]
     current_image_preview = pdf_cache["images"][index]
+    page_info_html = f'<div class="page-info">페이지 {index + 1} / {pdf_cache["total_pages"]}</div>'
     # Initialize default result values
+    markdown_content = "페이지가 아직 처리되지 않았습니다"
     processed_img = None
     layout_json = None
         pdf_cache["results"][index]):
         result = pdf_cache["results"][index]
+        markdown_content = result.get('markdown_content') or result.get('raw_output', '사용 가능한 콘텐츠가 없습니다')
         processed_img = result.get('processed_image', None) # Get the processed image
         layout_json = result.get('layout_result', None) # Get the layout JSON
     }
     """
+    with gr.Blocks(theme=gr.themes.Soft(), css=css, title="VIDraft-NH-OCR") as demo:
         # Header
         gr.HTML("""
         <div class="title" style="text-align: center">
+            <h1>🔍 VIDraft-NH-OCR</h1>
         </div>
         """)
                 # File input
                 file_input = gr.File(
+                    label="이미지 또는 PDF 업로드",
                     file_types=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".pdf"],
                     type="filepath"
                 )
                 # Image preview
                 image_preview = gr.Image(
+                    label="미리보기",
                     type="pil",
                     interactive=False,
                     height=300
                 # Page navigation for PDFs
                 with gr.Row():
+                    prev_page_btn = gr.Button("◀ 이전", size="md")
+                    page_info = gr.HTML('<div class="page-info">파일이 로드되지 않았습니다</div>')
+                    next_page_btn = gr.Button("다음 ▶", size="md")
                 # Advanced settings
+                with gr.Accordion("고급 설정", open=False):
                     max_new_tokens = gr.Slider(
                         minimum=1000,
                         maximum=32000,
                         value=24000,
                         step=1000,
+                        label="최대 토큰 수"
                     )
                     min_pixels = gr.Number(
                         value=MIN_PIXELS,
+                        label="최소 픽셀"
                     )
                     max_pixels = gr.Number(
                         value=MAX_PIXELS,
+                        label="최대 픽셀"
                     )
                 # Process button
                 process_btn = gr.Button(
+                    "🚀 문서 처리",
                     variant="primary",
                     elem_classes=["process-button"],
                     size="lg"
                 )
                 # Clear button
+                clear_btn = gr.Button("🗑️ 모두 지우기", variant="secondary")
             # Right column - Results
             with gr.Column(scale=2):
                 # Results tabs
                 with gr.Tabs():
                     # Processed image tab
+                    with gr.Tab("🖼️ 처리된 이미지"):
                         processed_image = gr.Image(
+                            label="레이아웃 감지 이미지",
                             type="pil",
                             interactive=False,
                             height=500
                         )
                     # Markdown output tab
+                    with gr.Tab("📝 추출된 콘텐츠"):
                         markdown_output = gr.Markdown(
+                            value="'문서 처리'를 클릭하여 콘텐츠를 추출하세요...",
                             height=500
                         )
                     # JSON layout tab
+                    with gr.Tab("📋 레이아웃 JSON"):
                         json_output = gr.JSON(
+                            label="레이아웃 분석 결과",
                             value=None
                         )
             try:
                 if not file_path:
+                    return None, "먼저 파일을 업로드하세요.", None
                 if model is None:
+                    return None, "모델이 로드되지 않았습니다. 페이지를 새로고침하고 다시 시도하세요.", None
                 # Load and preview file
                 image, page_info = load_file_for_preview(file_path)
                         )
                         all_results.append(result)
                         if result.get('markdown_content'):
+                            all_markdown.append(f"## 페이지 {i+1}\n\n{result['markdown_content']}")
                     pdf_cache["results"] = all_results
                     pdf_cache["is_parsed"] = True
                     pdf_cache["is_parsed"] = True
                     # Check if the content contains mostly Arabic text
+                    content = result['markdown_content'] or "추출된 콘텐츠가 없습니다"
                     if is_arabic_text(content):
                         markdown_update = gr.update(value=content, rtl=True)
                     else:
                     )
             except Exception as e:
+                error_msg = f"문서 처리 오류: {str(e)}"
                 print(error_msg)
                 traceback.print_exc()
                 return None, error_msg, None
         def handle_file_upload(file_path):
             """Handle file upload and show preview"""
             if not file_path:
+                return None, "파일이 로드되지 않았습니다"
             image, page_info = load_file_for_preview(file_path)
             return image, page_info
             return (
                 None,  # file_input
                 None,  # image_preview
+                '<div class="page-info">파일이 로드되지 않았습니다</div>',  # page_info
                 None,  # processed_image
+                "'문서 처리'를 클릭하여 콘텐츠를 추출하세요...",  # markdown_output
                 None,  # json_output
             )
         share=False,
         debug=True,
         show_error=True
+    )