Spaces:

johnlockejrr
/

yolov11_pylaia_catmus

Running

App Files Files Community

johnlockejrr commited on Apr 2

Commit

d5d8604

verified ·

1 Parent(s): 067557b

Use whole page and image/result side by side

Browse files

Files changed (1) hide show

app.py +24 -19

app.py CHANGED Viewed

@@ -129,59 +129,59 @@ def predict(model_name, input_img):
 def process_image(image):
     # Perform inference on an image, select textline only
     results = model(image, classes=0)
     img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     masks = results[0].masks
     polygons = []
     texts = []
     if masks is not None:
         # Get masks data and original image dimensions
         masks = masks.data.cpu().numpy()
         img_height, img_width = img_cv2.shape[:2]
         # Get bounding boxes in xyxy format
         boxes = results[0].boxes.xyxy.cpu().numpy()
         # Sort by y-coordinate of the top-left corner
         sorted_indices = np.argsort(boxes[:, 1])
         masks = masks[sorted_indices]
         boxes = boxes[sorted_indices]
         for i, (mask, box) in enumerate(zip(masks, boxes)):
             # Scale the mask to original image size
             mask = cv2.resize(mask.squeeze(), (img_width, img_height), interpolation=cv2.INTER_LINEAR)
             mask = (mask > 0.5).astype(np.uint8) * 255  # Apply threshold
             # Convert mask to polygon
             contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
             if contours:
                 # Get the largest contour
                 largest_contour = max(contours, key=cv2.contourArea)
                 simplified_polygon = simplify_polygons([largest_contour])[0]
                 if simplified_polygon is not None:
                     # Crop the image using the bounding box for text recognition
                     x1, y1, x2, y2 = map(int, box)
                     crop_img = img_cv2[y1:y2, x1:x2]
                     crop_pil = Image.fromarray(cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))
                     # Recognize text using PyLaia model
                     predicted = predict('pylaia-samaritan_v1', crop_pil)
                     texts.append(predicted[1]["text"])
                     # Convert polygon to list of points for display
                     poly_points = simplified_polygon.reshape(-1, 2).astype(int).tolist()
                     polygons.append(f"Line {i+1}: {poly_points}")
                     # Draw polygon on the image
-                    cv2.polylines(img_cv2, [simplified_polygon.reshape(-1, 1, 2).astype(int)],
                                  True, (0, 255, 0), 2)
     # Convert image back to RGB for display in Streamlit
     img_result = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
     # Combine polygons and texts into a DataFrame for table display
     table_data = pd.DataFrame({"Polygons": polygons, "Recognized Text": texts})
     return Image.fromarray(img_result), table_data
@@ -191,6 +191,7 @@ def segment_and_recognize(image):
     return segmented_image, table_data
 # Streamlit app layout
 st.title("YOLOv11 Text Line Segmentation & PyLaia Text Recognition on CATMuS/medieval")
 # File uploader
@@ -203,9 +204,13 @@ if uploaded_image is not None:
     if st.button("Segment and Recognize"):
         # Perform segmentation and recognition
         segmented_image, table_data = segment_and_recognize(image)
-        # Display the segmented image
-        st.image(segmented_image, caption="Segmented Image with Polygon Masks", use_container_width=True)
-        # Display the table with polygons and recognized text
-        st.table(table_data)

 def process_image(image):
     # Perform inference on an image, select textline only
     results = model(image, classes=0)
     img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     masks = results[0].masks
     polygons = []
     texts = []
     if masks is not None:
         # Get masks data and original image dimensions
         masks = masks.data.cpu().numpy()
         img_height, img_width = img_cv2.shape[:2]
         # Get bounding boxes in xyxy format
         boxes = results[0].boxes.xyxy.cpu().numpy()
         # Sort by y-coordinate of the top-left corner
         sorted_indices = np.argsort(boxes[:, 1])
         masks = masks[sorted_indices]
         boxes = boxes[sorted_indices]
         for i, (mask, box) in enumerate(zip(masks, boxes)):
             # Scale the mask to original image size
             mask = cv2.resize(mask.squeeze(), (img_width, img_height), interpolation=cv2.INTER_LINEAR)
             mask = (mask > 0.5).astype(np.uint8) * 255  # Apply threshold
             # Convert mask to polygon
             contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
             if contours:
                 # Get the largest contour
                 largest_contour = max(contours, key=cv2.contourArea)
                 simplified_polygon = simplify_polygons([largest_contour])[0]
                 if simplified_polygon is not None:
                     # Crop the image using the bounding box for text recognition
                     x1, y1, x2, y2 = map(int, box)
                     crop_img = img_cv2[y1:y2, x1:x2]
                     crop_pil = Image.fromarray(cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))
                     # Recognize text using PyLaia model
                     predicted = predict('pylaia-samaritan_v1', crop_pil)
                     texts.append(predicted[1]["text"])
                     # Convert polygon to list of points for display
                     poly_points = simplified_polygon.reshape(-1, 2).astype(int).tolist()
                     polygons.append(f"Line {i+1}: {poly_points}")
                     # Draw polygon on the image
+                    cv2.polylines(img_cv2, [simplified_polygon.reshape(-1, 1, 2).astype(int)],
                                  True, (0, 255, 0), 2)
     # Convert image back to RGB for display in Streamlit
     img_result = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
     # Combine polygons and texts into a DataFrame for table display
     table_data = pd.DataFrame({"Polygons": polygons, "Recognized Text": texts})
     return Image.fromarray(img_result), table_data
     return segmented_image, table_data
 # Streamlit app layout
+st.set_page_config(layout="wide")  # Use full page width
 st.title("YOLOv11 Text Line Segmentation & PyLaia Text Recognition on CATMuS/medieval")
 # File uploader
     if st.button("Segment and Recognize"):
         # Perform segmentation and recognition
         segmented_image, table_data = segment_and_recognize(image)
+        # Layout: Image on the left, Table on the right
+        col1, col2 = st.columns([2, 3])  # Adjust the ratio if needed
+        with col1:
+            st.image(segmented_image, caption="Segmented Image with Polygon Masks", use_container_width=True)
+        with col2:
+            st.table(table_data)