johnlockejrr commited on
Commit
d5d8604
·
verified ·
1 Parent(s): 067557b

Use whole page and image/result side by side

Browse files
Files changed (1) hide show
  1. app.py +24 -19
app.py CHANGED
@@ -129,59 +129,59 @@ def predict(model_name, input_img):
129
  def process_image(image):
130
  # Perform inference on an image, select textline only
131
  results = model(image, classes=0)
132
-
133
  img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
134
  masks = results[0].masks
135
  polygons = []
136
  texts = []
137
-
138
  if masks is not None:
139
  # Get masks data and original image dimensions
140
  masks = masks.data.cpu().numpy()
141
  img_height, img_width = img_cv2.shape[:2]
142
-
143
  # Get bounding boxes in xyxy format
144
  boxes = results[0].boxes.xyxy.cpu().numpy()
145
-
146
  # Sort by y-coordinate of the top-left corner
147
  sorted_indices = np.argsort(boxes[:, 1])
148
  masks = masks[sorted_indices]
149
  boxes = boxes[sorted_indices]
150
-
151
  for i, (mask, box) in enumerate(zip(masks, boxes)):
152
  # Scale the mask to original image size
153
  mask = cv2.resize(mask.squeeze(), (img_width, img_height), interpolation=cv2.INTER_LINEAR)
154
  mask = (mask > 0.5).astype(np.uint8) * 255 # Apply threshold
155
-
156
  # Convert mask to polygon
157
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
158
-
159
  if contours:
160
  # Get the largest contour
161
  largest_contour = max(contours, key=cv2.contourArea)
162
  simplified_polygon = simplify_polygons([largest_contour])[0]
163
-
164
  if simplified_polygon is not None:
165
  # Crop the image using the bounding box for text recognition
166
  x1, y1, x2, y2 = map(int, box)
167
  crop_img = img_cv2[y1:y2, x1:x2]
168
  crop_pil = Image.fromarray(cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))
169
-
170
  # Recognize text using PyLaia model
171
  predicted = predict('pylaia-samaritan_v1', crop_pil)
172
  texts.append(predicted[1]["text"])
173
-
174
  # Convert polygon to list of points for display
175
  poly_points = simplified_polygon.reshape(-1, 2).astype(int).tolist()
176
  polygons.append(f"Line {i+1}: {poly_points}")
177
-
178
  # Draw polygon on the image
179
- cv2.polylines(img_cv2, [simplified_polygon.reshape(-1, 1, 2).astype(int)],
180
  True, (0, 255, 0), 2)
181
-
182
  # Convert image back to RGB for display in Streamlit
183
  img_result = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
184
-
185
  # Combine polygons and texts into a DataFrame for table display
186
  table_data = pd.DataFrame({"Polygons": polygons, "Recognized Text": texts})
187
  return Image.fromarray(img_result), table_data
@@ -191,6 +191,7 @@ def segment_and_recognize(image):
191
  return segmented_image, table_data
192
 
193
  # Streamlit app layout
 
194
  st.title("YOLOv11 Text Line Segmentation & PyLaia Text Recognition on CATMuS/medieval")
195
 
196
  # File uploader
@@ -203,9 +204,13 @@ if uploaded_image is not None:
203
  if st.button("Segment and Recognize"):
204
  # Perform segmentation and recognition
205
  segmented_image, table_data = segment_and_recognize(image)
 
 
 
 
 
 
 
 
 
206
 
207
- # Display the segmented image
208
- st.image(segmented_image, caption="Segmented Image with Polygon Masks", use_container_width=True)
209
-
210
- # Display the table with polygons and recognized text
211
- st.table(table_data)
 
129
  def process_image(image):
130
  # Perform inference on an image, select textline only
131
  results = model(image, classes=0)
132
+
133
  img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
134
  masks = results[0].masks
135
  polygons = []
136
  texts = []
137
+
138
  if masks is not None:
139
  # Get masks data and original image dimensions
140
  masks = masks.data.cpu().numpy()
141
  img_height, img_width = img_cv2.shape[:2]
142
+
143
  # Get bounding boxes in xyxy format
144
  boxes = results[0].boxes.xyxy.cpu().numpy()
145
+
146
  # Sort by y-coordinate of the top-left corner
147
  sorted_indices = np.argsort(boxes[:, 1])
148
  masks = masks[sorted_indices]
149
  boxes = boxes[sorted_indices]
150
+
151
  for i, (mask, box) in enumerate(zip(masks, boxes)):
152
  # Scale the mask to original image size
153
  mask = cv2.resize(mask.squeeze(), (img_width, img_height), interpolation=cv2.INTER_LINEAR)
154
  mask = (mask > 0.5).astype(np.uint8) * 255 # Apply threshold
155
+
156
  # Convert mask to polygon
157
  contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
158
+
159
  if contours:
160
  # Get the largest contour
161
  largest_contour = max(contours, key=cv2.contourArea)
162
  simplified_polygon = simplify_polygons([largest_contour])[0]
163
+
164
  if simplified_polygon is not None:
165
  # Crop the image using the bounding box for text recognition
166
  x1, y1, x2, y2 = map(int, box)
167
  crop_img = img_cv2[y1:y2, x1:x2]
168
  crop_pil = Image.fromarray(cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))
169
+
170
  # Recognize text using PyLaia model
171
  predicted = predict('pylaia-samaritan_v1', crop_pil)
172
  texts.append(predicted[1]["text"])
173
+
174
  # Convert polygon to list of points for display
175
  poly_points = simplified_polygon.reshape(-1, 2).astype(int).tolist()
176
  polygons.append(f"Line {i+1}: {poly_points}")
177
+
178
  # Draw polygon on the image
179
+ cv2.polylines(img_cv2, [simplified_polygon.reshape(-1, 1, 2).astype(int)],
180
  True, (0, 255, 0), 2)
181
+
182
  # Convert image back to RGB for display in Streamlit
183
  img_result = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
184
+
185
  # Combine polygons and texts into a DataFrame for table display
186
  table_data = pd.DataFrame({"Polygons": polygons, "Recognized Text": texts})
187
  return Image.fromarray(img_result), table_data
 
191
  return segmented_image, table_data
192
 
193
  # Streamlit app layout
194
+ st.set_page_config(layout="wide") # Use full page width
195
  st.title("YOLOv11 Text Line Segmentation & PyLaia Text Recognition on CATMuS/medieval")
196
 
197
  # File uploader
 
204
  if st.button("Segment and Recognize"):
205
  # Perform segmentation and recognition
206
  segmented_image, table_data = segment_and_recognize(image)
207
+
208
+ # Layout: Image on the left, Table on the right
209
+ col1, col2 = st.columns([2, 3]) # Adjust the ratio if needed
210
+
211
+ with col1:
212
+ st.image(segmented_image, caption="Segmented Image with Polygon Masks", use_container_width=True)
213
+
214
+ with col2:
215
+ st.table(table_data)
216