Upload 11 files
Browse files- .gitattributes +1 -0
- app.py +211 -0
- catmus-medieval/language_model.arpa.gz +3 -0
- catmus-medieval/language_model.binary +3 -0
- catmus-medieval/lexicon.txt +254 -0
- catmus-medieval/model +0 -0
- catmus-medieval/syms.txt +254 -0
- catmus-medieval/tokens.txt +254 -0
- catmus-medieval/weights.ckpt +3 -0
- model.pt +3 -0
- requirements.txt +22 -0
- runtime.txt +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
catmus-medieval/language_model.binary filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import warnings
|
3 |
+
warnings.simplefilter("ignore", UserWarning)
|
4 |
+
|
5 |
+
from uuid import uuid4
|
6 |
+
from laia.scripts.htr.decode_ctc import run as decode
|
7 |
+
from laia.common.arguments import CommonArgs, DataArgs, TrainerArgs, DecodeArgs
|
8 |
+
import sys
|
9 |
+
from tempfile import NamedTemporaryFile, mkdtemp
|
10 |
+
from pathlib import Path
|
11 |
+
from contextlib import redirect_stdout
|
12 |
+
import re
|
13 |
+
from PIL import Image
|
14 |
+
from bidi.algorithm import get_display
|
15 |
+
import multiprocessing
|
16 |
+
from ultralytics import YOLO
|
17 |
+
import cv2
|
18 |
+
import numpy as np
|
19 |
+
import pandas as pd
|
20 |
+
import logging
|
21 |
+
from typing import List, Optional
|
22 |
+
|
23 |
+
# Configure logging
|
24 |
+
logging.getLogger("lightning.pytorch").setLevel(logging.ERROR)
|
25 |
+
|
26 |
+
# Load YOLOv8 model
|
27 |
+
model = YOLO('model.pt')
|
28 |
+
images = Path(mkdtemp())
|
29 |
+
DEFAULT_HEIGHT = 128
|
30 |
+
TEXT_DIRECTION = "LTR"
|
31 |
+
NUM_WORKERS = multiprocessing.cpu_count()
|
32 |
+
|
33 |
+
# Regex pattern for extracting results
|
34 |
+
IMAGE_ID_PATTERN = r"(?P<image_id>[-a-z0-9]{36})"
|
35 |
+
CONFIDENCE_PATTERN = r"(?P<confidence>[0-9.]+)" # For line
|
36 |
+
TEXT_PATTERN = r"\s*(?P<text>.*)\s*"
|
37 |
+
LINE_PREDICTION = re.compile(rf"{IMAGE_ID_PATTERN} {CONFIDENCE_PATTERN} {TEXT_PATTERN}")
|
38 |
+
|
39 |
+
def get_width(image, height=DEFAULT_HEIGHT):
|
40 |
+
aspect_ratio = image.width / image.height
|
41 |
+
return height * aspect_ratio
|
42 |
+
|
43 |
+
def simplify_polygons(polygons: List[np.ndarray], approx_level: float = 0.01) -> List[Optional[np.ndarray]]:
|
44 |
+
"""Simplify polygon contours using Douglas-Peucker algorithm.
|
45 |
+
|
46 |
+
Args:
|
47 |
+
polygons: List of polygon contours
|
48 |
+
approx_level: Approximation level (0-1), lower values mean more simplification
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
List of simplified polygons (or None for invalid polygons)
|
52 |
+
"""
|
53 |
+
result = []
|
54 |
+
for polygon in polygons:
|
55 |
+
if len(polygon) < 4:
|
56 |
+
result.append(None)
|
57 |
+
continue
|
58 |
+
|
59 |
+
perimeter = cv2.arcLength(polygon, True)
|
60 |
+
approx = cv2.approxPolyDP(polygon, approx_level * perimeter, True)
|
61 |
+
if len(approx) < 4:
|
62 |
+
result.append(None)
|
63 |
+
continue
|
64 |
+
|
65 |
+
result.append(approx.squeeze())
|
66 |
+
return result
|
67 |
+
|
68 |
+
def predict(model_name, input_img):
|
69 |
+
model_dir = 'catmus-medieval'
|
70 |
+
temperature = 2.0
|
71 |
+
batch_size = 1
|
72 |
+
|
73 |
+
weights_path = f"{model_dir}/weights.ckpt"
|
74 |
+
syms_path = f"{model_dir}/syms.txt"
|
75 |
+
language_model_params = {"language_model_weight": 1.0}
|
76 |
+
use_language_model = True
|
77 |
+
if use_language_model:
|
78 |
+
language_model_params.update({
|
79 |
+
"language_model_path": f"{model_dir}/language_model.binary",
|
80 |
+
"lexicon_path": f"{model_dir}/lexicon.txt",
|
81 |
+
"tokens_path": f"{model_dir}/tokens.txt",
|
82 |
+
})
|
83 |
+
|
84 |
+
common_args = CommonArgs(
|
85 |
+
checkpoint="weights.ckpt",
|
86 |
+
train_path=f"{model_dir}",
|
87 |
+
experiment_dirname="",
|
88 |
+
)
|
89 |
+
|
90 |
+
data_args = DataArgs(batch_size=batch_size, color_mode="L")
|
91 |
+
trainer_args = TrainerArgs(progress_bar_refresh_rate=0)
|
92 |
+
decode_args = DecodeArgs(
|
93 |
+
include_img_ids=True,
|
94 |
+
join_string="",
|
95 |
+
convert_spaces=True,
|
96 |
+
print_line_confidence_scores=True,
|
97 |
+
print_word_confidence_scores=False,
|
98 |
+
temperature=temperature,
|
99 |
+
use_language_model=use_language_model,
|
100 |
+
**language_model_params,
|
101 |
+
)
|
102 |
+
|
103 |
+
with NamedTemporaryFile() as pred_stdout, NamedTemporaryFile() as img_list:
|
104 |
+
image_id = uuid4()
|
105 |
+
input_img = input_img.resize((int(get_width(input_img)), DEFAULT_HEIGHT))
|
106 |
+
input_img.save(f"{images}/{image_id}.jpg")
|
107 |
+
Path(img_list.name).write_text("\n".join([str(image_id)]))
|
108 |
+
|
109 |
+
with redirect_stdout(open(pred_stdout.name, mode="w")):
|
110 |
+
decode(
|
111 |
+
syms=str(syms_path),
|
112 |
+
img_list=img_list.name,
|
113 |
+
img_dirs=[str(images)],
|
114 |
+
common=common_args,
|
115 |
+
data=data_args,
|
116 |
+
trainer=trainer_args,
|
117 |
+
decode=decode_args,
|
118 |
+
num_workers=1,
|
119 |
+
)
|
120 |
+
sys.stdout.flush()
|
121 |
+
predictions = Path(pred_stdout.name).read_text().strip().splitlines()
|
122 |
+
|
123 |
+
_, score, text = LINE_PREDICTION.match(predictions[0]).groups()
|
124 |
+
if TEXT_DIRECTION == "RTL":
|
125 |
+
return input_img, {"text": get_display(text), "score": score}
|
126 |
+
else:
|
127 |
+
return input_img, {"text": text, "score": score}
|
128 |
+
|
129 |
+
def process_image(image):
|
130 |
+
# Perform inference on an image, select textline only
|
131 |
+
results = model(image, classes=0)
|
132 |
+
|
133 |
+
img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
134 |
+
masks = results[0].masks
|
135 |
+
polygons = []
|
136 |
+
texts = []
|
137 |
+
|
138 |
+
if masks is not None:
|
139 |
+
# Get masks data and original image dimensions
|
140 |
+
masks = masks.data.cpu().numpy()
|
141 |
+
img_height, img_width = img_cv2.shape[:2]
|
142 |
+
|
143 |
+
# Get bounding boxes in xyxy format
|
144 |
+
boxes = results[0].boxes.xyxy.cpu().numpy()
|
145 |
+
|
146 |
+
# Sort by y-coordinate of the top-left corner
|
147 |
+
sorted_indices = np.argsort(boxes[:, 1])
|
148 |
+
masks = masks[sorted_indices]
|
149 |
+
boxes = boxes[sorted_indices]
|
150 |
+
|
151 |
+
for i, (mask, box) in enumerate(zip(masks, boxes)):
|
152 |
+
# Scale the mask to original image size
|
153 |
+
mask = cv2.resize(mask.squeeze(), (img_width, img_height), interpolation=cv2.INTER_LINEAR)
|
154 |
+
mask = (mask > 0.5).astype(np.uint8) * 255 # Apply threshold
|
155 |
+
|
156 |
+
# Convert mask to polygon
|
157 |
+
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
158 |
+
|
159 |
+
if contours:
|
160 |
+
# Get the largest contour
|
161 |
+
largest_contour = max(contours, key=cv2.contourArea)
|
162 |
+
simplified_polygon = simplify_polygons([largest_contour])[0]
|
163 |
+
|
164 |
+
if simplified_polygon is not None:
|
165 |
+
# Crop the image using the bounding box for text recognition
|
166 |
+
x1, y1, x2, y2 = map(int, box)
|
167 |
+
crop_img = img_cv2[y1:y2, x1:x2]
|
168 |
+
crop_pil = Image.fromarray(cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))
|
169 |
+
|
170 |
+
# Recognize text using PyLaia model
|
171 |
+
predicted = predict('pylaia-samaritan_v1', crop_pil)
|
172 |
+
texts.append(predicted[1]["text"])
|
173 |
+
|
174 |
+
# Convert polygon to list of points for display
|
175 |
+
poly_points = simplified_polygon.reshape(-1, 2).astype(int).tolist()
|
176 |
+
polygons.append(f"Line {i+1}: {poly_points}")
|
177 |
+
|
178 |
+
# Draw polygon on the image
|
179 |
+
cv2.polylines(img_cv2, [simplified_polygon.reshape(-1, 1, 2).astype(int)],
|
180 |
+
True, (0, 255, 0), 2)
|
181 |
+
|
182 |
+
# Convert image back to RGB for display in Streamlit
|
183 |
+
img_result = cv2.cvtColor(img_cv2, cv2.COLOR_BGR2RGB)
|
184 |
+
|
185 |
+
# Combine polygons and texts into a DataFrame for table display
|
186 |
+
table_data = pd.DataFrame({"Polygons": polygons, "Recognized Text": texts})
|
187 |
+
return Image.fromarray(img_result), table_data
|
188 |
+
|
189 |
+
def segment_and_recognize(image):
|
190 |
+
segmented_image, table_data = process_image(image)
|
191 |
+
return segmented_image, table_data
|
192 |
+
|
193 |
+
# Streamlit app layout
|
194 |
+
st.title("YOLOv11 Text Line Segmentation & PyLaia Text Recognition on CATMuS/medieval")
|
195 |
+
|
196 |
+
# File uploader
|
197 |
+
uploaded_image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
|
198 |
+
|
199 |
+
# Process the image if uploaded
|
200 |
+
if uploaded_image is not None:
|
201 |
+
image = Image.open(uploaded_image)
|
202 |
+
|
203 |
+
if st.button("Segment and Recognize"):
|
204 |
+
# Perform segmentation and recognition
|
205 |
+
segmented_image, table_data = segment_and_recognize(image)
|
206 |
+
|
207 |
+
# Display the segmented image
|
208 |
+
st.image(segmented_image, caption="Segmented Image with Polygon Masks", use_container_width=True)
|
209 |
+
|
210 |
+
# Display the table with polygons and recognized text
|
211 |
+
st.table(table_data)
|
catmus-medieval/language_model.arpa.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:754aca0dfb5b8c96b922e7ae96ad515a4bdd131760a8b59a3648b2c33d41329f
|
3 |
+
size 21600345
|
catmus-medieval/language_model.binary
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d825caa68d8a1d7db10196c9de6591550fc04dec020ddb2c502786330d11c91
|
3 |
+
size 50512864
|
catmus-medieval/lexicon.txt
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<ctc> <ctc>
|
2 |
+
# #
|
3 |
+
& &
|
4 |
+
' '
|
5 |
+
( (
|
6 |
+
) )
|
7 |
+
* *
|
8 |
+
+ +
|
9 |
+
, ,
|
10 |
+
- -
|
11 |
+
. .
|
12 |
+
/ /
|
13 |
+
0 0
|
14 |
+
1 1
|
15 |
+
2 2
|
16 |
+
3 3
|
17 |
+
4 4
|
18 |
+
5 5
|
19 |
+
6 6
|
20 |
+
7 7
|
21 |
+
8 8
|
22 |
+
9 9
|
23 |
+
: :
|
24 |
+
; ;
|
25 |
+
= =
|
26 |
+
? ?
|
27 |
+
A A
|
28 |
+
B B
|
29 |
+
C C
|
30 |
+
D D
|
31 |
+
E E
|
32 |
+
F F
|
33 |
+
G G
|
34 |
+
H H
|
35 |
+
I I
|
36 |
+
J J
|
37 |
+
K K
|
38 |
+
L L
|
39 |
+
M M
|
40 |
+
N N
|
41 |
+
O O
|
42 |
+
P P
|
43 |
+
Q Q
|
44 |
+
R R
|
45 |
+
S S
|
46 |
+
T T
|
47 |
+
U U
|
48 |
+
V V
|
49 |
+
W W
|
50 |
+
X X
|
51 |
+
Y Y
|
52 |
+
Z Z
|
53 |
+
[ [
|
54 |
+
] ]
|
55 |
+
^ ^
|
56 |
+
_ _
|
57 |
+
a a
|
58 |
+
b b
|
59 |
+
c c
|
60 |
+
d d
|
61 |
+
e e
|
62 |
+
f f
|
63 |
+
g g
|
64 |
+
h h
|
65 |
+
i i
|
66 |
+
j j
|
67 |
+
k k
|
68 |
+
l l
|
69 |
+
m m
|
70 |
+
n n
|
71 |
+
o o
|
72 |
+
p p
|
73 |
+
q q
|
74 |
+
r r
|
75 |
+
s s
|
76 |
+
t t
|
77 |
+
u u
|
78 |
+
v v
|
79 |
+
w w
|
80 |
+
x x
|
81 |
+
y y
|
82 |
+
z z
|
83 |
+
| |
|
84 |
+
~ ~
|
85 |
+
¬ ¬
|
86 |
+
° °
|
87 |
+
¶ ¶
|
88 |
+
Ø Ø
|
89 |
+
Þ Þ
|
90 |
+
ß ß
|
91 |
+
æ æ
|
92 |
+
ð ð
|
93 |
+
÷ ÷
|
94 |
+
þ þ
|
95 |
+
đ đ
|
96 |
+
ħ ħ
|
97 |
+
ł ł
|
98 |
+
ŧ ŧ
|
99 |
+
ƀ ƀ
|
100 |
+
ƿ ƿ
|
101 |
+
Ƿ Ƿ
|
102 |
+
ɂ ɂ
|
103 |
+
ɨ ɨ
|
104 |
+
ʰ ʰ
|
105 |
+
ʳ ʳ
|
106 |
+
ˡ ˡ
|
107 |
+
ˢ ˢ
|
108 |
+
ˣ ˣ
|
109 |
+
̀ ̀
|
110 |
+
́ ́
|
111 |
+
̂ ̂
|
112 |
+
̃ ̃
|
113 |
+
̇ ̇
|
114 |
+
̈ ̈
|
115 |
+
̌ ̌
|
116 |
+
̓ ̓
|
117 |
+
̔ ̔
|
118 |
+
̧ ̧
|
119 |
+
̨ ̨
|
120 |
+
̵ ̵
|
121 |
+
̶ ̶
|
122 |
+
̽ ̽
|
123 |
+
̾ ̾
|
124 |
+
͂ ͂
|
125 |
+
ͣ ͣ
|
126 |
+
ͤ ͤ
|
127 |
+
ͥ ͥ
|
128 |
+
ͦ ͦ
|
129 |
+
ͧ ͧ
|
130 |
+
ͨ ͨ
|
131 |
+
ͩ ͩ
|
132 |
+
ͪ ͪ
|
133 |
+
ͫ ͫ
|
134 |
+
ͬ ͬ
|
135 |
+
ͭ ͭ
|
136 |
+
ͮ ͮ
|
137 |
+
ͯ ͯ
|
138 |
+
Α Α
|
139 |
+
Β Β
|
140 |
+
Γ Γ
|
141 |
+
Δ Δ
|
142 |
+
Ε Ε
|
143 |
+
Ζ Ζ
|
144 |
+
Η Η
|
145 |
+
Θ Θ
|
146 |
+
Ι Ι
|
147 |
+
Κ Κ
|
148 |
+
Λ Λ
|
149 |
+
Μ Μ
|
150 |
+
Ν Ν
|
151 |
+
Ξ Ξ
|
152 |
+
Ο Ο
|
153 |
+
Π Π
|
154 |
+
Ρ Ρ
|
155 |
+
Σ Σ
|
156 |
+
Τ Τ
|
157 |
+
Υ Υ
|
158 |
+
Φ Φ
|
159 |
+
Χ Χ
|
160 |
+
Ψ Ψ
|
161 |
+
Ω Ω
|
162 |
+
α α
|
163 |
+
β β
|
164 |
+
γ γ
|
165 |
+
δ δ
|
166 |
+
ε ε
|
167 |
+
η η
|
168 |
+
θ θ
|
169 |
+
ι ι
|
170 |
+
κ κ
|
171 |
+
λ λ
|
172 |
+
μ μ
|
173 |
+
ν ν
|
174 |
+
ο ο
|
175 |
+
π π
|
176 |
+
ρ ρ
|
177 |
+
ς ς
|
178 |
+
σ σ
|
179 |
+
τ τ
|
180 |
+
υ υ
|
181 |
+
χ χ
|
182 |
+
ω ω
|
183 |
+
ᛞ ᛞ
|
184 |
+
ᵃ ᵃ
|
185 |
+
ᵇ ᵇ
|
186 |
+
ᵈ ᵈ
|
187 |
+
ᵉ ᵉ
|
188 |
+
ᵍ ᵍ
|
189 |
+
ᵐ ᵐ
|
190 |
+
ᵒ ᵒ
|
191 |
+
ᵖ ᵖ
|
192 |
+
ᵗ ᵗ
|
193 |
+
ᵘ ᵘ
|
194 |
+
ᶜ ᶜ
|
195 |
+
ᶞ ᶞ
|
196 |
+
ᶠ ᶠ
|
197 |
+
ᶻ ᶻ
|
198 |
+
᷑ ᷑
|
199 |
+
᷒ ᷒
|
200 |
+
ᷚ ᷚ
|
201 |
+
ᷜ ᷜ
|
202 |
+
ᷝ ᷝ
|
203 |
+
ᷠ ᷠ
|
204 |
+
ᷤ ᷤ
|
205 |
+
ᷦ ᷦ
|
206 |
+
ᷨ ᷨ
|
207 |
+
ᷫ ᷫ
|
208 |
+
ᷮ ᷮ
|
209 |
+
ẜ ẜ
|
210 |
+
ẞ ẞ
|
211 |
+
† †
|
212 |
+
‸ ‸
|
213 |
+
⁊ ⁊
|
214 |
+
⁋ ⁋
|
215 |
+
⁜ ⁜
|
216 |
+
⁰ ⁰
|
217 |
+
ⁱ ⁱ
|
218 |
+
⁴ ⁴
|
219 |
+
⁷ ⁷
|
220 |
+
⁹ ⁹
|
221 |
+
ⁿ ⁿ
|
222 |
+
℥ ℥
|
223 |
+
♡ ♡
|
224 |
+
❧ ❧
|
225 |
+
⟦ ⟦
|
226 |
+
⟧ ⟧
|
227 |
+
Ꝑ Ꝑ
|
228 |
+
ꝑ ꝑ
|
229 |
+
ꝓ ꝓ
|
230 |
+
ꝗ ꝗ
|
231 |
+
Ꝙ Ꝙ
|
232 |
+
ꝙ ꝙ
|
233 |
+
Ꝟ Ꝟ
|
234 |
+
ꝟ ꝟ
|
235 |
+
ꝥ ꝥ
|
236 |
+
ꝭ ꝭ
|
237 |
+
Ꝯ Ꝯ
|
238 |
+
ꝯ ꝯ
|
239 |
+
ꝰ ꝰ
|
240 |
+
ꝵ ꝵ
|
241 |
+
ꝷ ꝷ
|
242 |
+
Ꞧ Ꞧ
|
243 |
+
|
244 |
+
|
245 |
+
|
246 |
+
|
247 |
+
|
248 |
+
|
249 |
+
|
250 |
+
|
251 |
+
|
252 |
+
𐞥 𐞥
|
253 |
+
<unk> <unk>
|
254 |
+
<space> <space>
|
catmus-medieval/model
ADDED
Binary file (1.52 kB). View file
|
|
catmus-medieval/syms.txt
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<ctc> 0
|
2 |
+
# 1
|
3 |
+
& 2
|
4 |
+
' 3
|
5 |
+
( 4
|
6 |
+
) 5
|
7 |
+
* 6
|
8 |
+
+ 7
|
9 |
+
, 8
|
10 |
+
- 9
|
11 |
+
. 10
|
12 |
+
/ 11
|
13 |
+
0 12
|
14 |
+
1 13
|
15 |
+
2 14
|
16 |
+
3 15
|
17 |
+
4 16
|
18 |
+
5 17
|
19 |
+
6 18
|
20 |
+
7 19
|
21 |
+
8 20
|
22 |
+
9 21
|
23 |
+
: 22
|
24 |
+
; 23
|
25 |
+
= 24
|
26 |
+
? 25
|
27 |
+
A 26
|
28 |
+
B 27
|
29 |
+
C 28
|
30 |
+
D 29
|
31 |
+
E 30
|
32 |
+
F 31
|
33 |
+
G 32
|
34 |
+
H 33
|
35 |
+
I 34
|
36 |
+
J 35
|
37 |
+
K 36
|
38 |
+
L 37
|
39 |
+
M 38
|
40 |
+
N 39
|
41 |
+
O 40
|
42 |
+
P 41
|
43 |
+
Q 42
|
44 |
+
R 43
|
45 |
+
S 44
|
46 |
+
T 45
|
47 |
+
U 46
|
48 |
+
V 47
|
49 |
+
W 48
|
50 |
+
X 49
|
51 |
+
Y 50
|
52 |
+
Z 51
|
53 |
+
[ 52
|
54 |
+
] 53
|
55 |
+
^ 54
|
56 |
+
_ 55
|
57 |
+
a 56
|
58 |
+
b 57
|
59 |
+
c 58
|
60 |
+
d 59
|
61 |
+
e 60
|
62 |
+
f 61
|
63 |
+
g 62
|
64 |
+
h 63
|
65 |
+
i 64
|
66 |
+
j 65
|
67 |
+
k 66
|
68 |
+
l 67
|
69 |
+
m 68
|
70 |
+
n 69
|
71 |
+
o 70
|
72 |
+
p 71
|
73 |
+
q 72
|
74 |
+
r 73
|
75 |
+
s 74
|
76 |
+
t 75
|
77 |
+
u 76
|
78 |
+
v 77
|
79 |
+
w 78
|
80 |
+
x 79
|
81 |
+
y 80
|
82 |
+
z 81
|
83 |
+
| 82
|
84 |
+
~ 83
|
85 |
+
¬ 84
|
86 |
+
° 85
|
87 |
+
¶ 86
|
88 |
+
Ø 87
|
89 |
+
Þ 88
|
90 |
+
ß 89
|
91 |
+
æ 90
|
92 |
+
ð 91
|
93 |
+
÷ 92
|
94 |
+
þ 93
|
95 |
+
đ 94
|
96 |
+
ħ 95
|
97 |
+
ł 96
|
98 |
+
ŧ 97
|
99 |
+
ƀ 98
|
100 |
+
ƿ 99
|
101 |
+
Ƿ 100
|
102 |
+
ɂ 101
|
103 |
+
ɨ 102
|
104 |
+
ʰ 103
|
105 |
+
ʳ 104
|
106 |
+
ˡ 105
|
107 |
+
ˢ 106
|
108 |
+
ˣ 107
|
109 |
+
̀ 108
|
110 |
+
́ 109
|
111 |
+
̂ 110
|
112 |
+
̃ 111
|
113 |
+
̇ 112
|
114 |
+
̈ 113
|
115 |
+
̌ 114
|
116 |
+
̓ 115
|
117 |
+
̔ 116
|
118 |
+
̧ 117
|
119 |
+
̨ 118
|
120 |
+
̵ 119
|
121 |
+
̶ 120
|
122 |
+
̽ 121
|
123 |
+
̾ 122
|
124 |
+
͂ 123
|
125 |
+
ͣ 124
|
126 |
+
ͤ 125
|
127 |
+
ͥ 126
|
128 |
+
ͦ 127
|
129 |
+
ͧ 128
|
130 |
+
ͨ 129
|
131 |
+
ͩ 130
|
132 |
+
ͪ 131
|
133 |
+
ͫ 132
|
134 |
+
ͬ 133
|
135 |
+
ͭ 134
|
136 |
+
ͮ 135
|
137 |
+
ͯ 136
|
138 |
+
Α 137
|
139 |
+
Β 138
|
140 |
+
Γ 139
|
141 |
+
Δ 140
|
142 |
+
Ε 141
|
143 |
+
Ζ 142
|
144 |
+
Η 143
|
145 |
+
Θ 144
|
146 |
+
Ι 145
|
147 |
+
Κ 146
|
148 |
+
Λ 147
|
149 |
+
Μ 148
|
150 |
+
Ν 149
|
151 |
+
Ξ 150
|
152 |
+
Ο 151
|
153 |
+
Π 152
|
154 |
+
Ρ 153
|
155 |
+
Σ 154
|
156 |
+
Τ 155
|
157 |
+
Υ 156
|
158 |
+
Φ 157
|
159 |
+
Χ 158
|
160 |
+
Ψ 159
|
161 |
+
Ω 160
|
162 |
+
α 161
|
163 |
+
β 162
|
164 |
+
γ 163
|
165 |
+
δ 164
|
166 |
+
ε 165
|
167 |
+
η 166
|
168 |
+
θ 167
|
169 |
+
ι 168
|
170 |
+
κ 169
|
171 |
+
λ 170
|
172 |
+
μ 171
|
173 |
+
ν 172
|
174 |
+
ο 173
|
175 |
+
π 174
|
176 |
+
ρ 175
|
177 |
+
ς 176
|
178 |
+
σ 177
|
179 |
+
τ 178
|
180 |
+
υ 179
|
181 |
+
χ 180
|
182 |
+
ω 181
|
183 |
+
ᛞ 182
|
184 |
+
ᵃ 183
|
185 |
+
ᵇ 184
|
186 |
+
ᵈ 185
|
187 |
+
ᵉ 186
|
188 |
+
ᵍ 187
|
189 |
+
ᵐ 188
|
190 |
+
ᵒ 189
|
191 |
+
ᵖ 190
|
192 |
+
ᵗ 191
|
193 |
+
ᵘ 192
|
194 |
+
ᶜ 193
|
195 |
+
ᶞ 194
|
196 |
+
ᶠ 195
|
197 |
+
ᶻ 196
|
198 |
+
᷑ 197
|
199 |
+
᷒ 198
|
200 |
+
ᷚ 199
|
201 |
+
ᷜ 200
|
202 |
+
ᷝ 201
|
203 |
+
ᷠ 202
|
204 |
+
ᷤ 203
|
205 |
+
ᷦ 204
|
206 |
+
ᷨ 205
|
207 |
+
ᷫ 206
|
208 |
+
ᷮ 207
|
209 |
+
ẜ 208
|
210 |
+
ẞ 209
|
211 |
+
† 210
|
212 |
+
‸ 211
|
213 |
+
⁊ 212
|
214 |
+
⁋ 213
|
215 |
+
⁜ 214
|
216 |
+
⁰ 215
|
217 |
+
ⁱ 216
|
218 |
+
⁴ 217
|
219 |
+
⁷ 218
|
220 |
+
⁹ 219
|
221 |
+
ⁿ 220
|
222 |
+
℥ 221
|
223 |
+
♡ 222
|
224 |
+
❧ 223
|
225 |
+
⟦ 224
|
226 |
+
⟧ 225
|
227 |
+
Ꝑ 226
|
228 |
+
ꝑ 227
|
229 |
+
ꝓ 228
|
230 |
+
ꝗ 229
|
231 |
+
Ꝙ 230
|
232 |
+
ꝙ 231
|
233 |
+
Ꝟ 232
|
234 |
+
ꝟ 233
|
235 |
+
ꝥ 234
|
236 |
+
ꝭ 235
|
237 |
+
Ꝯ 236
|
238 |
+
ꝯ 237
|
239 |
+
ꝰ 238
|
240 |
+
ꝵ 239
|
241 |
+
ꝷ 240
|
242 |
+
Ꞧ 241
|
243 |
+
242
|
244 |
+
243
|
245 |
+
244
|
246 |
+
245
|
247 |
+
246
|
248 |
+
247
|
249 |
+
248
|
250 |
+
249
|
251 |
+
250
|
252 |
+
𐞥 251
|
253 |
+
<unk> 252
|
254 |
+
<space> 253
|
catmus-medieval/tokens.txt
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<ctc>
|
2 |
+
#
|
3 |
+
&
|
4 |
+
'
|
5 |
+
(
|
6 |
+
)
|
7 |
+
*
|
8 |
+
+
|
9 |
+
,
|
10 |
+
-
|
11 |
+
.
|
12 |
+
/
|
13 |
+
0
|
14 |
+
1
|
15 |
+
2
|
16 |
+
3
|
17 |
+
4
|
18 |
+
5
|
19 |
+
6
|
20 |
+
7
|
21 |
+
8
|
22 |
+
9
|
23 |
+
:
|
24 |
+
;
|
25 |
+
=
|
26 |
+
?
|
27 |
+
A
|
28 |
+
B
|
29 |
+
C
|
30 |
+
D
|
31 |
+
E
|
32 |
+
F
|
33 |
+
G
|
34 |
+
H
|
35 |
+
I
|
36 |
+
J
|
37 |
+
K
|
38 |
+
L
|
39 |
+
M
|
40 |
+
N
|
41 |
+
O
|
42 |
+
P
|
43 |
+
Q
|
44 |
+
R
|
45 |
+
S
|
46 |
+
T
|
47 |
+
U
|
48 |
+
V
|
49 |
+
W
|
50 |
+
X
|
51 |
+
Y
|
52 |
+
Z
|
53 |
+
[
|
54 |
+
]
|
55 |
+
^
|
56 |
+
_
|
57 |
+
a
|
58 |
+
b
|
59 |
+
c
|
60 |
+
d
|
61 |
+
e
|
62 |
+
f
|
63 |
+
g
|
64 |
+
h
|
65 |
+
i
|
66 |
+
j
|
67 |
+
k
|
68 |
+
l
|
69 |
+
m
|
70 |
+
n
|
71 |
+
o
|
72 |
+
p
|
73 |
+
q
|
74 |
+
r
|
75 |
+
s
|
76 |
+
t
|
77 |
+
u
|
78 |
+
v
|
79 |
+
w
|
80 |
+
x
|
81 |
+
y
|
82 |
+
z
|
83 |
+
|
|
84 |
+
~
|
85 |
+
¬
|
86 |
+
°
|
87 |
+
¶
|
88 |
+
Ø
|
89 |
+
Þ
|
90 |
+
ß
|
91 |
+
æ
|
92 |
+
ð
|
93 |
+
÷
|
94 |
+
þ
|
95 |
+
đ
|
96 |
+
ħ
|
97 |
+
ł
|
98 |
+
ŧ
|
99 |
+
ƀ
|
100 |
+
ƿ
|
101 |
+
Ƿ
|
102 |
+
ɂ
|
103 |
+
ɨ
|
104 |
+
ʰ
|
105 |
+
ʳ
|
106 |
+
ˡ
|
107 |
+
ˢ
|
108 |
+
ˣ
|
109 |
+
̀
|
110 |
+
́
|
111 |
+
̂
|
112 |
+
̃
|
113 |
+
̇
|
114 |
+
̈
|
115 |
+
̌
|
116 |
+
̓
|
117 |
+
̔
|
118 |
+
̧
|
119 |
+
̨
|
120 |
+
̵
|
121 |
+
̶
|
122 |
+
̽
|
123 |
+
̾
|
124 |
+
͂
|
125 |
+
ͣ
|
126 |
+
ͤ
|
127 |
+
ͥ
|
128 |
+
ͦ
|
129 |
+
ͧ
|
130 |
+
ͨ
|
131 |
+
ͩ
|
132 |
+
ͪ
|
133 |
+
ͫ
|
134 |
+
ͬ
|
135 |
+
ͭ
|
136 |
+
ͮ
|
137 |
+
ͯ
|
138 |
+
Α
|
139 |
+
Β
|
140 |
+
Γ
|
141 |
+
Δ
|
142 |
+
Ε
|
143 |
+
Ζ
|
144 |
+
Η
|
145 |
+
Θ
|
146 |
+
Ι
|
147 |
+
Κ
|
148 |
+
Λ
|
149 |
+
Μ
|
150 |
+
Ν
|
151 |
+
Ξ
|
152 |
+
Ο
|
153 |
+
Π
|
154 |
+
Ρ
|
155 |
+
Σ
|
156 |
+
Τ
|
157 |
+
Υ
|
158 |
+
Φ
|
159 |
+
Χ
|
160 |
+
Ψ
|
161 |
+
Ω
|
162 |
+
α
|
163 |
+
β
|
164 |
+
γ
|
165 |
+
δ
|
166 |
+
ε
|
167 |
+
η
|
168 |
+
θ
|
169 |
+
ι
|
170 |
+
κ
|
171 |
+
λ
|
172 |
+
μ
|
173 |
+
ν
|
174 |
+
ο
|
175 |
+
π
|
176 |
+
ρ
|
177 |
+
ς
|
178 |
+
σ
|
179 |
+
τ
|
180 |
+
υ
|
181 |
+
χ
|
182 |
+
ω
|
183 |
+
ᛞ
|
184 |
+
ᵃ
|
185 |
+
ᵇ
|
186 |
+
ᵈ
|
187 |
+
ᵉ
|
188 |
+
ᵍ
|
189 |
+
ᵐ
|
190 |
+
ᵒ
|
191 |
+
ᵖ
|
192 |
+
ᵗ
|
193 |
+
ᵘ
|
194 |
+
ᶜ
|
195 |
+
ᶞ
|
196 |
+
ᶠ
|
197 |
+
ᶻ
|
198 |
+
᷑
|
199 |
+
᷒
|
200 |
+
ᷚ
|
201 |
+
ᷜ
|
202 |
+
ᷝ
|
203 |
+
ᷠ
|
204 |
+
ᷤ
|
205 |
+
ᷦ
|
206 |
+
ᷨ
|
207 |
+
ᷫ
|
208 |
+
ᷮ
|
209 |
+
ẜ
|
210 |
+
ẞ
|
211 |
+
†
|
212 |
+
‸
|
213 |
+
⁊
|
214 |
+
⁋
|
215 |
+
⁜
|
216 |
+
⁰
|
217 |
+
ⁱ
|
218 |
+
⁴
|
219 |
+
⁷
|
220 |
+
⁹
|
221 |
+
ⁿ
|
222 |
+
℥
|
223 |
+
♡
|
224 |
+
❧
|
225 |
+
⟦
|
226 |
+
⟧
|
227 |
+
Ꝑ
|
228 |
+
ꝑ
|
229 |
+
ꝓ
|
230 |
+
ꝗ
|
231 |
+
Ꝙ
|
232 |
+
ꝙ
|
233 |
+
Ꝟ
|
234 |
+
ꝟ
|
235 |
+
ꝥ
|
236 |
+
ꝭ
|
237 |
+
Ꝯ
|
238 |
+
ꝯ
|
239 |
+
ꝰ
|
240 |
+
ꝵ
|
241 |
+
ꝷ
|
242 |
+
Ꞧ
|
243 |
+
|
244 |
+
|
245 |
+
|
246 |
+
|
247 |
+
|
248 |
+
|
249 |
+
|
250 |
+
|
251 |
+
|
252 |
+
𐞥
|
253 |
+
<unk>
|
254 |
+
<space>
|
catmus-medieval/weights.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d86b655a3f4095026a5a8c73241a308935dc22ad81e6b3bef4fe5765ace6a400
|
3 |
+
size 43382236
|
model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25996a37e4842b7203abe0f5c969c514aafacbb9864023cb5f057e79e282fe31
|
3 |
+
size 45140975
|
requirements.txt
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
imagesize==1.4.1
|
2 |
+
jsonargparse[signatures]==4.7
|
3 |
+
matplotlib==3.9.2
|
4 |
+
natsort==8.4.0
|
5 |
+
pytorch-lightning==1.4.2
|
6 |
+
scipy==1.11.3
|
7 |
+
textdistance==4.6.3
|
8 |
+
torch>=1.13,<1.14
|
9 |
+
torchaudio>=0.13,<0.14
|
10 |
+
torchmetrics<0.8.0
|
11 |
+
torchvision>=0.14,<0.15
|
12 |
+
mdutils==1.6.0
|
13 |
+
prettytable==3.11.0
|
14 |
+
python-bidi==0.6.0
|
15 |
+
pylaia==1.1.2
|
16 |
+
numpy==1.26.4
|
17 |
+
opencv-python==4.11.0.86
|
18 |
+
pillow==11.2.0
|
19 |
+
python-bidi==0.6.0
|
20 |
+
streamlit==1.44.0
|
21 |
+
transformers==4.50.3
|
22 |
+
ultralytics==8.3.99
|
runtime.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python-3.10
|