Spaces:

Knightmovies
/

ScannerUniversalRotator

Sleeping

App Files Files Community

Knightmovies commited on Sep 21

Commit

28c6b80

verified ·

1 Parent(s): 65c6a60

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -105

app.py CHANGED Viewed

@@ -8,34 +8,30 @@ import pytesseract
 from scipy.spatial import distance as dist
 # ==============================================================================
-# App Configuration & Model Loading
 # ==============================================================================
 st.set_page_config(
     page_title="Document AI Toolkit",
     page_icon="🤖",
     layout="wide"
 )
-# Use Streamlit's caching to load the model only once.
 @st.cache_resource
 def load_model():
     """Loads the Table Transformer model and processor."""
-    st.write("Cache miss: Loading Table Transformer model...")
-    processor = DetrImageProcessor.from_pretrained("microsoft/table-transformer-structure-recognition")
-    model = TableTransformerForObjectDetection.from_pretrained("microsoft/table-transformer-structure-recognition")
-    return processor, model
-processor, model = load_model()
 # ==============================================================================
-# Core Image Processing Functions (Unchanged)
 # ==============================================================================
 def order_points(pts):
     xSorted = pts[np.argsort(pts[:, 0]), :]
-    leftMost = xSorted[:2, :]
-    rightMost = xSorted[2:, :]
     leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
     (tl, bl) = leftMost
     D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0]
@@ -53,8 +49,7 @@ def perspective_transform(image, pts):
     maxHeight = max(int(heightA), int(heightB))
     dst = np.array([[0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32")
     M = cv2.getPerspectiveTransform(rect, dst)
-    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
-    return warped
 def find_and_straighten_document(image):
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
@@ -63,127 +58,124 @@ def find_and_straighten_document(image):
     if not contours: return None
     page_contour = max(contours, key=cv2.contourArea)
     if cv2.contourArea(page_contour) < (image.shape[0] * image.shape[1] * 0.1): return None
-    rect = cv2.minAreaRect(page_contour)
-    box = cv2.boxPoints(rect)
     return perspective_transform(image, box)
 def correct_orientation(image):
     try:
-        osd = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT)
         rotation = osd['rotate']
-        if rotation in [90, 180, 270]:
-            if rotation == 90:
-                rotated_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
-            elif rotation == 180:
-                rotated_image = cv2.rotate(image, cv2.ROTATE_180)
-            else: # 270
-                rotated_image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
-            return rotated_image
-    except Exception as e:
-        st.warning(f"OSD check failed: {e}. Using original orientation.")
-    return image
 def extract_and_draw_table_structure(image_bgr):
     image_pil = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
     inputs = processor(images=image_pil, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     target_sizes = torch.tensor([image_pil.size[::-1]])
-    results = processor.post_process_object_detection(outputs, threshold=0.7, target_sizes=target_sizes)[0]
     img_with_boxes = image_bgr.copy()
-    colors = {"table row": (0, 255, 0), "table column": (0, 0, 255), "table": (255, 0, 255)}
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
         class_name = model.config.id2label[label.item()]
         if class_name in colors:
             xmin, ymin, xmax, ymax = [int(val) for val in box.tolist()]
-            color = colors[class_name]
-            cv2.rectangle(img_with_boxes, (xmin, ymin), (xmax, ymax), color, 2)
     return img_with_boxes
 # ==============================================================================
-# UI Functions for Each Step
 # ==============================================================================
-def initialize_state():
-    """Initializes the session state."""
-    if "stage" not in st.session_state:
-        st.session_state.stage = "upload"
-        st.session_state.original_image = None
-        st.session_state.processed_image = None
-def reset_app():
-    """Resets the app to the initial upload stage."""
-    for key in st.session_state.keys():
-        del st.session_state[key]
-    initialize_state()
-# --- Main App UI ---
-initialize_state()
-st.title("🤖 Document AI Toolkit")
-st.markdown("---")
-# Use columns for a centered and constrained layout
-left_col, main_col, right_col = st.columns([1, 4, 1])
-with main_col:
-    # --- STAGE 1: UPLOAD ---
     if st.session_state.stage == "upload":
-        st.header("Step 1: Upload Your Document")
-        uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
         if uploaded_file:
             file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
             st.session_state.original_image = cv2.imdecode(file_bytes, 1)
-            st.image(cv2.cvtColor(st.session_state.original_image, cv2.COLOR_BGR2RGB), caption="Original Upload", use_container_width=True)
-            if st.button("▶️ Start Pre-processing"):
-                st.session_state.stage = "process"
-                st.rerun()
-    # --- STAGE 2: PRE-PROCESSING ---
-    elif st.session_state.stage == "process":
-        st.header("Step 2: Pre-processing Result")
-        with st.spinner("Straightening and correcting orientation..."):
-            original_image = st.session_state.original_image
-            straightened = find_and_straighten_document(original_image)
-            image_to_orient = straightened if straightened is not None and straightened.size > 0 else original_image
-            st.session_state.processed_image = correct_orientation(image_to_orient)
-        st.image(cv2.cvtColor(st.session_state.processed_image, cv2.COLOR_BGR2RGB), caption="Corrected Document", use_container_width=True)
-        st.info("The document has been straightened and oriented.")
         if st.button("📊 Find Table Structure"):
-            st.session_state.stage = "analyze"
             st.rerun()
-        if st.button("↩️ Upload New Image"):
-            reset_app()
             st.rerun()
-    # --- STAGE 3: ANALYSIS ---
-    elif st.session_state.stage == "analyze":
-        st.header("Step 3: Table Structure Analysis")
-        processed_image = st.session_state.processed_image
-        with st.spinner("Running Table Transformer model... This can take a moment."):
-            annotated_image = extract_and_draw_table_structure(processed_image)
-        st.subheader("Final Results")
-        # Display results side-by-side
-        res_col1, res_col2 = st.columns(2)
-        with res_col1:
-            st.image(cv2.cvtColor(processed_image, cv2.COLOR_BGR2RGB), caption="Cleaned Document", use_container_width=True)
-            _, buf = cv2.imencode(".jpg", processed_image)
-            st.download_button(
-                label="📥 Download Clean Image",
-                data=buf.tobytes(),
-                file_name="corrected_document.jpg",
-                mime="image/jpeg",
-            )
-        with res_col2:
-            st.image(cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB), caption="Detected Table Structure", use_container_width=True)
-        if st.button("🔄 Start Over"):
-            reset_app()
-            st.rerun()

 from scipy.spatial import distance as dist
 # ==============================================================================
+# App Configuration
 # ==============================================================================
 st.set_page_config(
     page_title="Document AI Toolkit",
     page_icon="🤖",
     layout="wide"
 )
+# ==============================================================================
+# Model Loading (Cached)
+# ==============================================================================
 @st.cache_resource
 def load_model():
     """Loads the Table Transformer model and processor."""
+    return TableTransformerForObjectDetection.from_pretrained("microsoft/table-transformer-structure-recognition"), DetrImageProcessor.from_pretrained("microsoft/table-transformer-structure-recognition")
+model, processor = load_model()
 # ==============================================================================
+# Core Image Processing Functions
 # ==============================================================================
 def order_points(pts):
     xSorted = pts[np.argsort(pts[:, 0]), :]
+    leftMost, rightMost = xSorted[:2, :], xSorted[2:, :]
     leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
     (tl, bl) = leftMost
     D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0]
     maxHeight = max(int(heightA), int(heightB))
     dst = np.array([[0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype="float32")
     M = cv2.getPerspectiveTransform(rect, dst)
+    return cv2.warpPerspective(image, M, (maxWidth, maxHeight))
 def find_and_straighten_document(image):
     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     if not contours: return None
     page_contour = max(contours, key=cv2.contourArea)
     if cv2.contourArea(page_contour) < (image.shape[0] * image.shape[1] * 0.1): return None
+    box = cv2.boxPoints(cv2.minAreaRect(page_contour))
     return perspective_transform(image, box)
 def correct_orientation(image):
+    """Robust orientation correction using a cascade approach."""
     try:
+        osd = pytesseract.image_to_osd(image, output_type=pytesseract.Output.DICT, timeout=5)
         rotation = osd['rotate']
+        if rotation > 0:
+            angle_map = {90: cv2.ROTATE_90_COUNTERCLOCKWISE, 180: cv2.ROTATE_180, 270: cv2.ROTATE_90_CLOCKWISE}
+            return cv2.rotate(image, angle_map[rotation])
+        return image
+    except Exception:
+        # Fallback to bounding box method if OSD fails
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+        orientations = {0: thresh, 90: cv2.rotate(thresh, cv2.ROTATE_90_CLOCKWISE), 180: cv2.rotate(thresh, cv2.ROTATE_180), 270: cv2.rotate(thresh, cv2.ROTATE_90_COUNTERCLOCKWISE)}
+        best_rotation, max_horizontal_boxes = 0, -1
+        for angle, rotated_img in orientations.items():
+            data = pytesseract.image_to_data(rotated_img, output_type=pytesseract.Output.DICT, timeout=5)
+            horizontal_boxes = sum(1 for i, conf in enumerate(data['conf']) if int(conf) > 10 and data['width'][i] > data['height'][i])
+            if horizontal_boxes > max_horizontal_boxes:
+                max_horizontal_boxes, best_rotation = horizontal_boxes, angle
+        angle_map = {90: cv2.ROTATE_90_CLOCKWISE, 180: cv2.ROTATE_180, 270: cv2.ROTATE_90_COUNTERCLOCKWISE}
+        return cv2.rotate(image, angle_map[best_rotation]) if best_rotation > 0 else image
 def extract_and_draw_table_structure(image_bgr):
+    """Finds and draws table structure using OpenCV."""
     image_pil = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
     inputs = processor(images=image_pil, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     target_sizes = torch.tensor([image_pil.size[::-1]])
+    results = processor.post_process_object_detection(outputs, threshold=0.6, target_sizes=target_sizes)[0]
     img_with_boxes = image_bgr.copy()
+    colors = {"table row": (0, 255, 0), "table column": (255, 0, 0), "table": (255, 0, 255)} # Red for columns
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
         class_name = model.config.id2label[label.item()]
         if class_name in colors:
             xmin, ymin, xmax, ymax = [int(val) for val in box.tolist()]
+            cv2.rectangle(img_with_boxes, (xmin, ymin), (xmax, ymax), colors[class_name], 2)
     return img_with_boxes
 # ==============================================================================
+# Streamlit UI
 # ==============================================================================
+# --- Session State Management ---
+if "stage" not in st.session_state:
+    st.session_state.stage = "upload"
+    st.session_state.original_image = None
+    st.session_state.processed_image = None
+    st.session_state.annotated_image = None
+# --- Sidebar Controls ---
+with st.sidebar:
+    st.title("🤖 Document AI Toolkit")
+    st.markdown("---")
     if st.session_state.stage == "upload":
+        st.header("Step 1: Upload Image")
+        uploaded_file = st.file_uploader("Upload your document image", type=["jpg", "jpeg", "png"], label_visibility="collapsed")
         if uploaded_file:
             file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
             st.session_state.original_image = cv2.imdecode(file_bytes, 1)
+            st.session_state.stage = "processing"
+            st.rerun()
+    elif st.session_state.stage == "processing":
+        st.header("Step 2: Pre-process")
+        st.info("Straightening and correcting orientation...")
+        if st.button("▶️ Start Pre-processing"):
+            with st.spinner("Working..."):
+                original_image = st.session_state.original_image
+                straightened = find_and_straighten_document(original_image)
+                image_to_orient = straightened if straightened is not None and straightened.size > 0 else original_image
+                st.session_state.processed_image = correct_orientation(image_to_orient)
+            st.session_state.stage = "analysis"
+            st.rerun()
+    elif st.session_state.stage == "analysis":
+        st.header("Step 3: Analyze Table")
+        st.info("Detecting table structure...")
         if st.button("📊 Find Table Structure"):
+            with st.spinner("Running Table Transformer model..."):
+                st.session_state.annotated_image = extract_and_draw_table_structure(st.session_state.processed_image)
+            st.session_state.stage = "done"
             st.rerun()
+    if st.session_state.stage != "upload":
+        if st.button("🔄 Start Over"):
+            for key in list(st.session_state.keys()):
+                del st.session_state[key]
             st.rerun()
+# --- Main Panel Display ---
+st.header("Document Processing Stages")
+if st.session_state.stage == "upload":
+    st.info("Please upload a document image using the sidebar to begin.")
+if st.session_state.original_image is not None:
+    st.subheader("1. Original Image")
+    st.image(cv2.cvtColor(st.session_state.original_image, cv2.COLOR_BGR2RGB), use_container_width=True)
+if st.session_state.processed_image is not None:
+    st.subheader("2. Pre-processed Image")
+    st.image(cv2.cvtColor(st.session_state.processed_image, cv2.COLOR_BGR2RGB), caption="Straightened & Oriented", use_container_width=True)
+if st.session_state.annotated_image is not None:
+    st.subheader("3. Final Analysis")
+    tab1, tab2 = st.tabs(["✅ Corrected Document", "📊 Table Structure"])
+    with tab1:
+        st.image(cv2.cvtColor(st.session_state.processed_image, cv2.COLOR_BGR2RGB), use_container_width=True)
+        _, buf = cv2.imencode(".jpg", st.session_state.processed_image)
+        st.download_button("📥 Download Clean Image", data=buf.tobytes(), file_name="corrected.jpg", mime="image/jpeg")
+    with tab2:
+        st.image(cv2.cvtColor(st.session_state.annotated_image, cv2.COLOR_BGR2RGB), use_container_width=True)