Spaces:

Knightmovies
/

ScannerUniversalRotator

Sleeping

App Files Files Community

Knightmovies commited on Sep 21

Commit

5cac9c5

verified ·

1 Parent(s): 28c6b80

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -45

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import pytesseract
 from scipy.spatial import distance as dist
 # ==============================================================================
-# App Configuration
 # ==============================================================================
 st.set_page_config(
     page_title="Document AI Toolkit",
@@ -16,6 +16,19 @@ st.set_page_config(
     layout="wide"
 )
 # ==============================================================================
 # Model Loading (Cached)
 # ==============================================================================
@@ -27,7 +40,7 @@ def load_model():
 model, processor = load_model()
 # ==============================================================================
-# Core Image Processing Functions
 # ==============================================================================
 def order_points(pts):
     xSorted = pts[np.argsort(pts[:, 0]), :]
@@ -71,16 +84,18 @@ def correct_orientation(image):
             return cv2.rotate(image, angle_map[rotation])
         return image
     except Exception:
-        # Fallback to bounding box method if OSD fails
         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
         thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
         orientations = {0: thresh, 90: cv2.rotate(thresh, cv2.ROTATE_90_CLOCKWISE), 180: cv2.rotate(thresh, cv2.ROTATE_180), 270: cv2.rotate(thresh, cv2.ROTATE_90_COUNTERCLOCKWISE)}
         best_rotation, max_horizontal_boxes = 0, -1
         for angle, rotated_img in orientations.items():
-            data = pytesseract.image_to_data(rotated_img, output_type=pytesseract.Output.DICT, timeout=5)
-            horizontal_boxes = sum(1 for i, conf in enumerate(data['conf']) if int(conf) > 10 and data['width'][i] > data['height'][i])
-            if horizontal_boxes > max_horizontal_boxes:
-                max_horizontal_boxes, best_rotation = horizontal_boxes, angle
         angle_map = {90: cv2.ROTATE_90_CLOCKWISE, 180: cv2.ROTATE_180, 270: cv2.ROTATE_90_COUNTERCLOCKWISE}
         return cv2.rotate(image, angle_map[best_rotation]) if best_rotation > 0 else image
@@ -92,9 +107,8 @@ def extract_and_draw_table_structure(image_bgr):
         outputs = model(**inputs)
     target_sizes = torch.tensor([image_pil.size[::-1]])
     results = processor.post_process_object_detection(outputs, threshold=0.6, target_sizes=target_sizes)[0]
     img_with_boxes = image_bgr.copy()
-    colors = {"table row": (0, 255, 0), "table column": (255, 0, 0), "table": (255, 0, 255)} # Red for columns
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
         class_name = model.config.id2label[label.item()]
         if class_name in colors:
@@ -118,64 +132,71 @@ with st.sidebar:
     st.title("🤖 Document AI Toolkit")
     st.markdown("---")
     if st.session_state.stage == "upload":
         st.header("Step 1: Upload Image")
-        uploaded_file = st.file_uploader("Upload your document image", type=["jpg", "jpeg", "png"], label_visibility="collapsed")
         if uploaded_file:
             file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
             st.session_state.original_image = cv2.imdecode(file_bytes, 1)
             st.session_state.stage = "processing"
             st.rerun()
     elif st.session_state.stage == "processing":
         st.header("Step 2: Pre-process")
-        st.info("Straightening and correcting orientation...")
-        if st.button("▶️ Start Pre-processing"):
-            with st.spinner("Working..."):
                 original_image = st.session_state.original_image
                 straightened = find_and_straighten_document(original_image)
                 image_to_orient = straightened if straightened is not None and straightened.size > 0 else original_image
                 st.session_state.processed_image = correct_orientation(image_to_orient)
             st.session_state.stage = "analysis"
             st.rerun()
     elif st.session_state.stage == "analysis":
         st.header("Step 3: Analyze Table")
-        st.info("Detecting table structure...")
-        if st.button("📊 Find Table Structure"):
             with st.spinner("Running Table Transformer model..."):
                 st.session_state.annotated_image = extract_and_draw_table_structure(st.session_state.processed_image)
             st.session_state.stage = "done"
             st.rerun()
-    if st.session_state.stage != "upload":
-        if st.button("🔄 Start Over"):
-            for key in list(st.session_state.keys()):
-                del st.session_state[key]
-            st.rerun()
 # --- Main Panel Display ---
-st.header("Document Processing Stages")
-if st.session_state.stage == "upload":
-    st.info("Please upload a document image using the sidebar to begin.")
 if st.session_state.original_image is not None:
-    st.subheader("1. Original Image")
-    st.image(cv2.cvtColor(st.session_state.original_image, cv2.COLOR_BGR2RGB), use_container_width=True)
 if st.session_state.processed_image is not None:
-    st.subheader("2. Pre-processed Image")
-    st.image(cv2.cvtColor(st.session_state.processed_image, cv2.COLOR_BGR2RGB), caption="Straightened & Oriented", use_container_width=True)
-if st.session_state.annotated_image is not None:
-    st.subheader("3. Final Analysis")
-    tab1, tab2 = st.tabs(["✅ Corrected Document", "📊 Table Structure"])
-    with tab1:
-        st.image(cv2.cvtColor(st.session_state.processed_image, cv2.COLOR_BGR2RGB), use_container_width=True)
-        _, buf = cv2.imencode(".jpg", st.session_state.processed_image)
-        st.download_button("📥 Download Clean Image", data=buf.tobytes(), file_name="corrected.jpg", mime="image/jpeg")
-    with tab2:
-        st.image(cv2.cvtColor(st.session_state.annotated_image, cv2.COLOR_BGR2RGB), use_container_width=True)

 from scipy.spatial import distance as dist
 # ==============================================================================
+# App Configuration & Styling
 # ==============================================================================
 st.set_page_config(
     page_title="Document AI Toolkit",
     layout="wide"
 )
+# Inject CSS for a centered, fixed-width layout
+st.markdown("""
+    <style>
+    .main .block-container {
+        max-width: 900px;
+        padding-top: 2rem;
+        padding-right: 2rem;
+        padding-left: 2rem;
+        padding-bottom: 2rem;
+    }
+    </style>
+    """, unsafe_allow_html=True)
 # ==============================================================================
 # Model Loading (Cached)
 # ==============================================================================
 model, processor = load_model()
 # ==============================================================================
+# Core Image Processing Functions (Unchanged)
 # ==============================================================================
 def order_points(pts):
     xSorted = pts[np.argsort(pts[:, 0]), :]
             return cv2.rotate(image, angle_map[rotation])
         return image
     except Exception:
         gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
         thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
         orientations = {0: thresh, 90: cv2.rotate(thresh, cv2.ROTATE_90_CLOCKWISE), 180: cv2.rotate(thresh, cv2.ROTATE_180), 270: cv2.rotate(thresh, cv2.ROTATE_90_COUNTERCLOCKWISE)}
         best_rotation, max_horizontal_boxes = 0, -1
         for angle, rotated_img in orientations.items():
+            try:
+                data = pytesseract.image_to_data(rotated_img, output_type=pytesseract.Output.DICT, timeout=5)
+                horizontal_boxes = sum(1 for i, conf in enumerate(data['conf']) if int(conf) > 10 and data['width'][i] > data['height'][i])
+                if horizontal_boxes > max_horizontal_boxes:
+                    max_horizontal_boxes, best_rotation = horizontal_boxes, angle
+            except Exception:
+                continue
         angle_map = {90: cv2.ROTATE_90_CLOCKWISE, 180: cv2.ROTATE_180, 270: cv2.ROTATE_90_COUNTERCLOCKWISE}
         return cv2.rotate(image, angle_map[best_rotation]) if best_rotation > 0 else image
         outputs = model(**inputs)
     target_sizes = torch.tensor([image_pil.size[::-1]])
     results = processor.post_process_object_detection(outputs, threshold=0.6, target_sizes=target_sizes)[0]
     img_with_boxes = image_bgr.copy()
+    colors = {"table row": (0, 255, 0), "table column": (255, 0, 0), "table": (255, 0, 255)}
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
         class_name = model.config.id2label[label.item()]
         if class_name in colors:
     st.title("🤖 Document AI Toolkit")
     st.markdown("---")
+    if st.button("🔄 Start Over", use_container_width=True):
+        for key in list(st.session_state.keys()):
+            del st.session_state[key]
+        st.rerun()
     if st.session_state.stage == "upload":
         st.header("Step 1: Upload Image")
+        uploaded_file = st.file_uploader("Upload your document", type=["jpg", "jpeg", "png"], label_visibility="collapsed")
         if uploaded_file:
             file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
             st.session_state.original_image = cv2.imdecode(file_bytes, 1)
             st.session_state.stage = "processing"
             st.rerun()
     elif st.session_state.stage == "processing":
         st.header("Step 2: Pre-process")
+        if st.button("▶️ Start Pre-processing", use_container_width=True, type="primary"):
+            with st.spinner("Straightening & correcting orientation..."):
                 original_image = st.session_state.original_image
                 straightened = find_and_straighten_document(original_image)
                 image_to_orient = straightened if straightened is not None and straightened.size > 0 else original_image
                 st.session_state.processed_image = correct_orientation(image_to_orient)
             st.session_state.stage = "analysis"
             st.rerun()
     elif st.session_state.stage == "analysis":
         st.header("Step 3: Analyze Table")
+        if st.button("📊 Find Table Structure", use_container_width=True, type="primary"):
             with st.spinner("Running Table Transformer model..."):
                 st.session_state.annotated_image = extract_and_draw_table_structure(st.session_state.processed_image)
             st.session_state.stage = "done"
             st.rerun()
 # --- Main Panel Display ---
+st.title("Document Processing Workflow")
+# Step 1: Upload
+expander1 = st.expander("Step 1: Upload Original Image", expanded=(st.session_state.stage == "upload"))
+with expander1:
+    if st.session_state.original_image is None:
+        st.info("Please upload a document image using the sidebar to begin.")
+    else:
+        st.image(cv2.cvtColor(st.session_state.original_image, cv2.COLOR_BGR2RGB), use_container_width=True)
+        st.success("Image uploaded successfully.")
+# Step 2: Pre-process
 if st.session_state.original_image is not None:
+    expander2 = st.expander("Step 2: Pre-process Document", expanded=(st.session_state.stage == "processing" or st.session_state.stage == "analysis"))
+    with expander2:
+        if st.session_state.processed_image is None:
+            st.info("Click 'Start Pre-processing' in the sidebar.")
+        else:
+            st.image(cv2.cvtColor(st.session_state.processed_image, cv2.COLOR_BGR2RGB), caption="Straightened & Oriented", use_container_width=True)
+            st.success("Pre-processing complete.")
+# Step 3: Analysis
 if st.session_state.processed_image is not None:
+    expander3 = st.expander("Step 3: Analyze Table Structure", expanded=(st.session_state.stage == "done"))
+    with expander3:
+        if st.session_state.annotated_image is None:
+            st.info("Click 'Find Table Structure' in the sidebar to run the analysis.")
+        else:
+            tab1, tab2 = st.tabs(["✅ Corrected Document", "📊 Table Structure"])
+            with tab1:
+                st.image(cv2.cvtColor(st.session_state.processed_image, cv2.COLOR_BGR2RGB), use_container_width=True)
+                _, buf = cv2.imencode(".jpg", st.session_state.processed_image)
+                st.download_button("📥 Download Clean Image", data=buf.tobytes(), file_name="corrected.jpg", mime="image/jpeg", use_container_width=True)
+            with tab2:
+                st.image(cv2.cvtColor(st.session_state.annotated_image, cv2.COLOR_BGR2RGB), use_container_width=True)
+            st.success("Analysis complete.")