Upload 3 files
Browse files- english_phonetic_dict.pkl +3 -0
- english_scansion_tool.pkl +3 -0
- poetry_scansion_app.py +241 -0
english_phonetic_dict.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75aceecc03186feb38f8d2040873b475af440c01e00fc862e4ba41643b3e074d
|
3 |
+
size 7490207
|
english_scansion_tool.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2dd099e8c5f43cc8b7bb5e44a0f4f6a2adb962d15deb5ed461054439f929bc4
|
3 |
+
size 8088295
|
poetry_scansion_app.py
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# English Poetry Scansion Tool for macOS - v6.0 (Definitive)
|
2 |
+
#
|
3 |
+
# This definitive version incorporates a weighted scoring system for meter detection,
|
4 |
+
# correctly prioritizing content words over function words. It also adds a new,
|
5 |
+
# separate summary section for standard IPA phonetic transcriptions.
|
6 |
+
#
|
7 |
+
# This version should fix the scansion accuracy issues and fulfill all feature requests.
|
8 |
+
# Author: Gemini
|
9 |
+
# Date: October 1, 2025
|
10 |
+
|
11 |
+
import tkinter as tk
|
12 |
+
from tkinter import scrolledtext, messagebox, ttk
|
13 |
+
import pickle
|
14 |
+
import re
|
15 |
+
import os
|
16 |
+
|
17 |
+
class EnglishScansion:
|
18 |
+
def __init__(self, dict_path):
|
19 |
+
if not os.path.exists(dict_path):
|
20 |
+
raise FileNotFoundError(f"The dictionary file '{dict_path}' was not found. Please run the converter (v5) first.")
|
21 |
+
|
22 |
+
with open(dict_path, 'rb') as f:
|
23 |
+
self.pronunciation_dict = pickle.load(f)
|
24 |
+
|
25 |
+
self.METERS = {'Iambic': '01', 'Trochaic': '10', 'Anapestic': '001', 'Dactylic': '100', 'Amphibrach': '010'}
|
26 |
+
self.VOWELS = "aeiouyAEIOUY"
|
27 |
+
self.ACCENT_MAP = {'a': 'á', 'e': 'é', 'i': 'í', 'o': 'ó', 'u': 'ú', 'y': 'ý',
|
28 |
+
'A': 'Á', 'E': 'É', 'I': 'Í', 'O': 'Ó', 'U': 'Ú', 'Y': 'Ý'}
|
29 |
+
self.VOWEL_SOUNDS = re.compile(r'[A-Z]{2,3}[0-9]')
|
30 |
+
self.FUNCTION_WORDS = {
|
31 |
+
'a', 'an', 'the', 'and', 'but', 'or', 'for', 'nor', 'on', 'at', 'to', 'from',
|
32 |
+
'by', 'with', 'in', 'of', 'is', 'am', 'are', 'was', 'were', 'be', 'being',
|
33 |
+
'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'shall',
|
34 |
+
'should', 'can', 'could', 'may', 'might', 'must', 'as', 'if', 'so', 'my', 'your',
|
35 |
+
'his', 'her', 'its', 'our', 'their', 'me', 'you', 'him', 'her', 'it', 'us', 'them'
|
36 |
+
}
|
37 |
+
|
38 |
+
def get_word_data(self, word):
|
39 |
+
"""Looks up a word and returns its processed data tuple and original IPA."""
|
40 |
+
lookup_word = word.lower().strip(".,;:!?()\"'")
|
41 |
+
if not lookup_word or lookup_word not in self.pronunciation_dict:
|
42 |
+
return None, f"<{lookup_word}>", 0, None
|
43 |
+
|
44 |
+
arpabet_list, ipa_raw = self.pronunciation_dict[lookup_word][0]
|
45 |
+
stress_pattern, syllable_count = "", 0
|
46 |
+
|
47 |
+
for phoneme in arpabet_list:
|
48 |
+
if self.VOWEL_SOUNDS.match(phoneme):
|
49 |
+
syllable_count += 1
|
50 |
+
stress_pattern += '1' if phoneme.endswith('1') or phoneme.endswith('2') else '0'
|
51 |
+
|
52 |
+
return stress_pattern, ' '.join(arpabet_list), syllable_count, ipa_raw
|
53 |
+
|
54 |
+
def analyze_line(self, line):
|
55 |
+
words = re.findall(r"[\w']+|[.,!?;:]+", line)
|
56 |
+
|
57 |
+
lexical_stress_pattern, word_map = "", []
|
58 |
+
|
59 |
+
for word in words:
|
60 |
+
if re.match(r"[\w']+", word):
|
61 |
+
pattern, _, syllables, _ = self.get_word_data(word)
|
62 |
+
if pattern is not None:
|
63 |
+
lexical_stress_pattern += pattern
|
64 |
+
word_map.extend([word.lower()] * syllables)
|
65 |
+
|
66 |
+
line_syllables = len(lexical_stress_pattern)
|
67 |
+
if line_syllables == 0:
|
68 |
+
return {"original": line, "meter": "Unknown", "score": 1.0, "syllables": 0, "final_stress_pattern": ""}
|
69 |
+
|
70 |
+
# ** NEW WEIGHTED SCORING LOGIC **
|
71 |
+
scores = {}
|
72 |
+
for name, pattern in self.METERS.items():
|
73 |
+
template = (pattern * (line_syllables // len(pattern) + 1))[:line_syllables]
|
74 |
+
mismatch_score = 0
|
75 |
+
for i in range(line_syllables):
|
76 |
+
if lexical_stress_pattern[i] != template[i]:
|
77 |
+
# Penalize mismatches on function words less
|
78 |
+
weight = 0.5 if word_map[i] in self.FUNCTION_WORDS else 1.0
|
79 |
+
mismatch_score += weight
|
80 |
+
scores[name] = mismatch_score / line_syllables
|
81 |
+
|
82 |
+
best_meter_name = min(scores, key=scores.get)
|
83 |
+
best_meter_score = scores[best_meter_name]
|
84 |
+
|
85 |
+
winning_meter_pattern = self.METERS[best_meter_name]
|
86 |
+
final_stress_pattern = (winning_meter_pattern * (line_syllables // len(winning_meter_pattern) + 1))[:line_syllables]
|
87 |
+
|
88 |
+
foot_len = len(winning_meter_pattern)
|
89 |
+
num_feet = round(line_syllables / foot_len) if foot_len > 0 else 0
|
90 |
+
foot_map = {0:"0-foot", 1:"monometer", 2:"dimeter", 3:"trimeter", 4:"tetrameter", 5:"pentameter", 6:"hexameter"}
|
91 |
+
meter_desc = f"{best_meter_name} {foot_map.get(num_feet, f'{num_feet}-foot')}"
|
92 |
+
|
93 |
+
return {"original": line, "meter": meter_desc, "score": best_meter_score, "syllables": line_syllables, "final_stress_pattern": final_stress_pattern}
|
94 |
+
|
95 |
+
def format_line(self, line, analysis_result, format_type):
|
96 |
+
words = re.findall(r"[\w']+|[^\w']+", line)
|
97 |
+
output_parts = []
|
98 |
+
|
99 |
+
if format_type == 'arpabet' or format_type == 'ipa':
|
100 |
+
for word in words:
|
101 |
+
if re.match(r"[\w']+", word):
|
102 |
+
_, arpabet, _, ipa = self.get_word_data(word)
|
103 |
+
output_parts.append(ipa if format_type == 'ipa' and ipa else f"[{arpabet}]")
|
104 |
+
else:
|
105 |
+
output_parts.append(word)
|
106 |
+
return " ".join(output_parts)
|
107 |
+
|
108 |
+
final_stress = analysis_result['final_stress_pattern']
|
109 |
+
syllable_cursor = 0
|
110 |
+
for word in words:
|
111 |
+
if not re.match(r"[\w']+", word):
|
112 |
+
output_parts.append(word)
|
113 |
+
continue
|
114 |
+
|
115 |
+
lexical_pattern, _, num_syllables, _ = self.get_word_data(word)
|
116 |
+
if lexical_pattern is None:
|
117 |
+
output_parts.append(f"<{word}>")
|
118 |
+
continue
|
119 |
+
|
120 |
+
word_stress_segment = final_stress[syllable_cursor : syllable_cursor + num_syllables]
|
121 |
+
|
122 |
+
if format_type == 'plus':
|
123 |
+
output_parts.append(f"+{word}" if '1' in word_stress_segment else word)
|
124 |
+
elif format_type == 'accent':
|
125 |
+
temp_word = list(word)
|
126 |
+
vowel_groups = list(re.finditer(r'[aeiouy]+', word, re.IGNORECASE))
|
127 |
+
for i, stress_char in enumerate(word_stress_segment):
|
128 |
+
if stress_char == '1' and i < len(vowel_groups):
|
129 |
+
vowel_match = vowel_groups[i]
|
130 |
+
pos = vowel_match.start()
|
131 |
+
char_to_stress = temp_word[pos]
|
132 |
+
temp_word[pos] = self.ACCENT_MAP.get(char_to_stress, char_to_stress)
|
133 |
+
output_parts.append("".join(temp_word))
|
134 |
+
|
135 |
+
syllable_cursor += num_syllables
|
136 |
+
|
137 |
+
return " ".join(output_parts).replace(" ,", ",").replace(" .", ".").strip()
|
138 |
+
|
139 |
+
class ScansionApp:
|
140 |
+
def __init__(self, root):
|
141 |
+
self.root = root
|
142 |
+
self.root.title("English Poetry Scansion Tool")
|
143 |
+
self.root.geometry("1000x900")
|
144 |
+
self.VOWELS = "aeiouyAEIOUY"
|
145 |
+
|
146 |
+
self.style = ttk.Style()
|
147 |
+
try: self.style.theme_use('aqua')
|
148 |
+
except tk.TclError: self.style.theme_use('default')
|
149 |
+
self.style.configure('TFrame', background='#ECECEC')
|
150 |
+
self.style.configure('TButton', font=('Helvetica Neue', 13, 'bold'), padding=10)
|
151 |
+
self.style.configure('TLabel', font=('Helvetica Neue', 14), background='#ECECEC')
|
152 |
+
self.style.configure('Header.TLabel', font=('Helvetica Neue', 18, 'bold'))
|
153 |
+
|
154 |
+
self.main_frame = ttk.Frame(root, padding="20")
|
155 |
+
self.main_frame.pack(fill=tk.BOTH, expand=True)
|
156 |
+
|
157 |
+
ttk.Label(self.main_frame, text="Poetry Scansion Analyzer", style='Header.TLabel').pack(pady=(0, 20))
|
158 |
+
self.input_text = scrolledtext.ScrolledText(self.main_frame, height=8, font=("Menlo", 12), wrap=tk.WORD)
|
159 |
+
self.input_text.pack(fill=tk.BOTH, expand=True, pady=5)
|
160 |
+
self.input_text.insert(tk.END, "But I will ignite\nIn your eyes a fire\nNow I give you strength\nNow I give you power")
|
161 |
+
|
162 |
+
ttk.Button(self.main_frame, text="Analyze Poem", command=self.perform_scan).pack(pady=15)
|
163 |
+
|
164 |
+
self.output_text = tk.Text(self.main_frame, font=("Menlo", 12), wrap=tk.WORD, background="black", relief=tk.SOLID, borderwidth=1, padx=10, pady=10)
|
165 |
+
self.output_text.pack(fill=tk.BOTH, expand=True, pady=5)
|
166 |
+
|
167 |
+
self.output_text.tag_configure('vowel', foreground="#9370DB")
|
168 |
+
self.output_text.tag_configure('stressed_vowel', foreground="#FF474C", font=("Menlo", 12, "bold"))
|
169 |
+
self.output_text.tag_configure('consonant', foreground="#FFC700")
|
170 |
+
self.output_text.tag_configure('punctuation', foreground="#32CD32")
|
171 |
+
self.output_text.tag_configure('info', foreground="white")
|
172 |
+
self.output_text.tag_configure('separator', foreground="#555555")
|
173 |
+
self.output_text.tag_configure('header', foreground="white", font=("Menlo", 14, "bold", "underline"), justify='center')
|
174 |
+
|
175 |
+
self.output_text.config(state=tk.DISABLED)
|
176 |
+
|
177 |
+
try:
|
178 |
+
self.scanner = EnglishScansion(dict_path="english_phonetic_dict.pkl")
|
179 |
+
except Exception as e:
|
180 |
+
messagebox.showerror("Fatal Error", str(e))
|
181 |
+
self.root.destroy()
|
182 |
+
|
183 |
+
def perform_scan(self):
|
184 |
+
poem_text = self.input_text.get("1.0", tk.END)
|
185 |
+
if not poem_text.strip(): return
|
186 |
+
|
187 |
+
self.output_text.config(state=tk.NORMAL)
|
188 |
+
self.output_text.delete("1.0", tk.END)
|
189 |
+
|
190 |
+
accented_poem_lines, ipa_poem_lines = [], []
|
191 |
+
|
192 |
+
for line in poem_text.strip().split('\n'):
|
193 |
+
if not line.strip():
|
194 |
+
self.output_text.insert(tk.END, '\n')
|
195 |
+
continue
|
196 |
+
|
197 |
+
analysis = self.scanner.analyze_line(line)
|
198 |
+
|
199 |
+
accented_line = self.scanner.format_line(line, analysis, 'accent')
|
200 |
+
accented_poem_lines.append(accented_line)
|
201 |
+
ipa_poem_lines.append(self.scanner.format_line(line, analysis, 'ipa'))
|
202 |
+
|
203 |
+
self.render_color_line(self.scanner.format_line(line, analysis, 'plus') + "\n")
|
204 |
+
self.render_color_line(accented_line + "\n")
|
205 |
+
self.output_text.insert(tk.END, self.scanner.format_line(line, analysis, 'arpabet') + "\n", 'info')
|
206 |
+
|
207 |
+
info_str = f"Meter: {analysis['meter']} | Syllables: {analysis['syllables']} | Mismatch Score: {analysis['score']:.2f}\n"
|
208 |
+
self.output_text.insert(tk.END, info_str, 'info')
|
209 |
+
self.output_text.insert(tk.END, "-" * 70 + "\n", 'separator')
|
210 |
+
|
211 |
+
# Render Summaries
|
212 |
+
self.output_text.insert(tk.END, "\n\n")
|
213 |
+
self.output_text.insert(tk.END, "Stressed Poem Summary\n", 'header')
|
214 |
+
self.output_text.insert(tk.END, "\n")
|
215 |
+
for line in accented_poem_lines:
|
216 |
+
self.render_color_line(line + "\n")
|
217 |
+
|
218 |
+
self.output_text.insert(tk.END, "\n\n")
|
219 |
+
self.output_text.insert(tk.END, "IPA Phonetic Transcription\n", 'header')
|
220 |
+
self.output_text.insert(tk.END, "\n")
|
221 |
+
for line in ipa_poem_lines:
|
222 |
+
self.output_text.insert(tk.END, line + "\n", 'info')
|
223 |
+
|
224 |
+
self.output_text.config(state=tk.DISABLED)
|
225 |
+
|
226 |
+
def render_color_line(self, text):
|
227 |
+
accented_vowels = "áéíóúýÁÉÍÓÚÝ"
|
228 |
+
for char in text:
|
229 |
+
tag = 'info'
|
230 |
+
if char in accented_vowels: tag = 'stressed_vowel'
|
231 |
+
elif char in self.VOWELS: tag = 'vowel'
|
232 |
+
elif char.isalpha(): tag = 'consonant'
|
233 |
+
elif char in "<>": tag = 'separator'
|
234 |
+
elif char in ".,;:!?'\"": tag = 'punctuation'
|
235 |
+
elif char == '+': tag = 'stressed_vowel'
|
236 |
+
self.output_text.insert(tk.END, char, tag)
|
237 |
+
|
238 |
+
if __name__ == "__main__":
|
239 |
+
root = tk.Tk()
|
240 |
+
app = ScansionApp(root)
|
241 |
+
root.mainloop()
|