Use whole page and image/result side by side
Browse files
app.py
CHANGED
@@ -129,59 +129,59 @@ def predict(model_name, input_img):
|
|
129 |
def process_image(image):
|
130 |
# Perform inference on an image, select textline only
|
131 |
results = model(image, classes=0)
|
132 |
-
|
133 |
img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
134 |
masks = results[0].masks
|
135 |
polygons = []
|
136 |
texts = []
|
137 |
-
|
138 |
if masks is not None:
|
139 |
# Get masks data and original image dimensions
|
140 |
masks = masks.data.cpu().numpy()
|
141 |
img_height, img_width = img_cv2.shape[:2]
|
142 |
-
|
143 |
# Get bounding boxes in xyxy format
|
144 |
boxes = results[0].boxes.xyxy.cpu().numpy()
|
145 |
-
|
146 |
# Sort by y-coordinate of the top-left corner
|
147 |
sorted_indices = np.argsort(boxes[:, 1])
|
148 |
masks = masks[sorted_indices]
|
149 |
boxes = boxes[sorted_indices]
|
150 |
-
|
151 |
for i, (mask, box) in enumerate(zip(masks, boxes)):
|
152 |
# Scale the mask to original image size
|
153 |
mask = cv2.resize(mask.squeeze(), (img_width, img_height), interpolation=cv2.INTER_LINEAR)
|
154 |
mask = (mask > 0.5).astype(np.uint8) * 255 # Apply threshold
|
155 |
-
|
156 |
# Convert mask to polygon
|
157 |
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
158 |
-
|
159 |
if contours:
|
160 |
# Get the largest contour
|
161 |
largest_contour = max(contours, key=cv2.contourArea)
|
162 |
simplified_polygon = simplify_polygons([largest_contour])[0]
|
163 |
-
|
164 |
if simplified_polygon is not None:
|
165 |
# Crop the image using the bounding box for text recognition
|
166 |
x1, y1, x2, y2 = map(int, box)
|
167 |
crop_img = img_cv2[y1:y2, x1:x2]
|
168 |
crop_pil = Image.fromarray(cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))
|
169 |
-
|
170 |
# Recognize text using PyLaia model
|
171 |
predicted = predict('pylaia-samaritan_v1', crop_pil)
|
172 |
texts.append(predicted[1]["text"])
|
173 |
-
|
174 |
# Convert polygon to list of points for display
|
175 |
poly_points = simplified_polygon.reshape(-1, 2).astype(int).tolist()
|
176 |
polygons.append(f"Line {i+1}: {poly_points}")
|
177 |
-
|
178 |
# Draw polygon on the image
|
179 |
-
cv2.polylines(img_cv2, [simplified_polygon.reshape(-1, 1, 2).astype(int)],
|
180 |
True, (0, 255, 0), 2)
|
181 |
-
|
182 |
# Convert image back to RGB for display in Streamlit
|
183 |
img_result = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
|
184 |
-
|
185 |
# Combine polygons and texts into a DataFrame for table display
|
186 |
table_data = pd.DataFrame({"Polygons": polygons, "Recognized Text": texts})
|
187 |
return Image.fromarray(img_result), table_data
|
@@ -191,6 +191,7 @@ def segment_and_recognize(image):
|
|
191 |
return segmented_image, table_data
|
192 |
|
193 |
# Streamlit app layout
|
|
|
194 |
st.title("YOLOv11 Text Line Segmentation & PyLaia Text Recognition on CATMuS/medieval")
|
195 |
|
196 |
# File uploader
|
@@ -203,9 +204,13 @@ if uploaded_image is not None:
|
|
203 |
if st.button("Segment and Recognize"):
|
204 |
# Perform segmentation and recognition
|
205 |
segmented_image, table_data = segment_and_recognize(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
-
# Display the segmented image
|
208 |
-
st.image(segmented_image, caption="Segmented Image with Polygon Masks", use_container_width=True)
|
209 |
-
|
210 |
-
# Display the table with polygons and recognized text
|
211 |
-
st.table(table_data)
|
|
|
129 |
def process_image(image):
|
130 |
# Perform inference on an image, select textline only
|
131 |
results = model(image, classes=0)
|
132 |
+
|
133 |
img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
134 |
masks = results[0].masks
|
135 |
polygons = []
|
136 |
texts = []
|
137 |
+
|
138 |
if masks is not None:
|
139 |
# Get masks data and original image dimensions
|
140 |
masks = masks.data.cpu().numpy()
|
141 |
img_height, img_width = img_cv2.shape[:2]
|
142 |
+
|
143 |
# Get bounding boxes in xyxy format
|
144 |
boxes = results[0].boxes.xyxy.cpu().numpy()
|
145 |
+
|
146 |
# Sort by y-coordinate of the top-left corner
|
147 |
sorted_indices = np.argsort(boxes[:, 1])
|
148 |
masks = masks[sorted_indices]
|
149 |
boxes = boxes[sorted_indices]
|
150 |
+
|
151 |
for i, (mask, box) in enumerate(zip(masks, boxes)):
|
152 |
# Scale the mask to original image size
|
153 |
mask = cv2.resize(mask.squeeze(), (img_width, img_height), interpolation=cv2.INTER_LINEAR)
|
154 |
mask = (mask > 0.5).astype(np.uint8) * 255 # Apply threshold
|
155 |
+
|
156 |
# Convert mask to polygon
|
157 |
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
158 |
+
|
159 |
if contours:
|
160 |
# Get the largest contour
|
161 |
largest_contour = max(contours, key=cv2.contourArea)
|
162 |
simplified_polygon = simplify_polygons([largest_contour])[0]
|
163 |
+
|
164 |
if simplified_polygon is not None:
|
165 |
# Crop the image using the bounding box for text recognition
|
166 |
x1, y1, x2, y2 = map(int, box)
|
167 |
crop_img = img_cv2[y1:y2, x1:x2]
|
168 |
crop_pil = Image.fromarray(cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))
|
169 |
+
|
170 |
# Recognize text using PyLaia model
|
171 |
predicted = predict('pylaia-samaritan_v1', crop_pil)
|
172 |
texts.append(predicted[1]["text"])
|
173 |
+
|
174 |
# Convert polygon to list of points for display
|
175 |
poly_points = simplified_polygon.reshape(-1, 2).astype(int).tolist()
|
176 |
polygons.append(f"Line {i+1}: {poly_points}")
|
177 |
+
|
178 |
# Draw polygon on the image
|
179 |
+
cv2.polylines(img_cv2, [simplified_polygon.reshape(-1, 1, 2).astype(int)],
|
180 |
True, (0, 255, 0), 2)
|
181 |
+
|
182 |
# Convert image back to RGB for display in Streamlit
|
183 |
img_result = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
|
184 |
+
|
185 |
# Combine polygons and texts into a DataFrame for table display
|
186 |
table_data = pd.DataFrame({"Polygons": polygons, "Recognized Text": texts})
|
187 |
return Image.fromarray(img_result), table_data
|
|
|
191 |
return segmented_image, table_data
|
192 |
|
193 |
# Streamlit app layout
|
194 |
+
st.set_page_config(layout="wide") # Use full page width
|
195 |
st.title("YOLOv11 Text Line Segmentation & PyLaia Text Recognition on CATMuS/medieval")
|
196 |
|
197 |
# File uploader
|
|
|
204 |
if st.button("Segment and Recognize"):
|
205 |
# Perform segmentation and recognition
|
206 |
segmented_image, table_data = segment_and_recognize(image)
|
207 |
+
|
208 |
+
# Layout: Image on the left, Table on the right
|
209 |
+
col1, col2 = st.columns([2, 3]) # Adjust the ratio if needed
|
210 |
+
|
211 |
+
with col1:
|
212 |
+
st.image(segmented_image, caption="Segmented Image with Polygon Masks", use_container_width=True)
|
213 |
+
|
214 |
+
with col2:
|
215 |
+
st.table(table_data)
|
216 |
|
|
|
|
|
|
|
|
|
|