AlekseyCalvin commited on
Commit
c06feca
·
verified ·
1 Parent(s): ddd1c1d

Upload 3 files

Browse files
english_phonetic_dict.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75aceecc03186feb38f8d2040873b475af440c01e00fc862e4ba41643b3e074d
3
+ size 7490207
english_scansion_tool.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2dd099e8c5f43cc8b7bb5e44a0f4f6a2adb962d15deb5ed461054439f929bc4
3
+ size 8088295
poetry_scansion_app.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # English Poetry Scansion Tool for macOS - v6.0 (Definitive)
2
+ #
3
+ # This definitive version incorporates a weighted scoring system for meter detection,
4
+ # correctly prioritizing content words over function words. It also adds a new,
5
+ # separate summary section for standard IPA phonetic transcriptions.
6
+ #
7
+ # This version should fix the scansion accuracy issues and fulfill all feature requests.
8
+ # Author: Gemini
9
+ # Date: October 1, 2025
10
+
11
+ import tkinter as tk
12
+ from tkinter import scrolledtext, messagebox, ttk
13
+ import pickle
14
+ import re
15
+ import os
16
+
17
+ class EnglishScansion:
18
+ def __init__(self, dict_path):
19
+ if not os.path.exists(dict_path):
20
+ raise FileNotFoundError(f"The dictionary file '{dict_path}' was not found. Please run the converter (v5) first.")
21
+
22
+ with open(dict_path, 'rb') as f:
23
+ self.pronunciation_dict = pickle.load(f)
24
+
25
+ self.METERS = {'Iambic': '01', 'Trochaic': '10', 'Anapestic': '001', 'Dactylic': '100', 'Amphibrach': '010'}
26
+ self.VOWELS = "aeiouyAEIOUY"
27
+ self.ACCENT_MAP = {'a': 'á', 'e': 'é', 'i': 'í', 'o': 'ó', 'u': 'ú', 'y': 'ý',
28
+ 'A': 'Á', 'E': 'É', 'I': 'Í', 'O': 'Ó', 'U': 'Ú', 'Y': 'Ý'}
29
+ self.VOWEL_SOUNDS = re.compile(r'[A-Z]{2,3}[0-9]')
30
+ self.FUNCTION_WORDS = {
31
+ 'a', 'an', 'the', 'and', 'but', 'or', 'for', 'nor', 'on', 'at', 'to', 'from',
32
+ 'by', 'with', 'in', 'of', 'is', 'am', 'are', 'was', 'were', 'be', 'being',
33
+ 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'shall',
34
+ 'should', 'can', 'could', 'may', 'might', 'must', 'as', 'if', 'so', 'my', 'your',
35
+ 'his', 'her', 'its', 'our', 'their', 'me', 'you', 'him', 'her', 'it', 'us', 'them'
36
+ }
37
+
38
+ def get_word_data(self, word):
39
+ """Looks up a word and returns its processed data tuple and original IPA."""
40
+ lookup_word = word.lower().strip(".,;:!?()\"'")
41
+ if not lookup_word or lookup_word not in self.pronunciation_dict:
42
+ return None, f"<{lookup_word}>", 0, None
43
+
44
+ arpabet_list, ipa_raw = self.pronunciation_dict[lookup_word][0]
45
+ stress_pattern, syllable_count = "", 0
46
+
47
+ for phoneme in arpabet_list:
48
+ if self.VOWEL_SOUNDS.match(phoneme):
49
+ syllable_count += 1
50
+ stress_pattern += '1' if phoneme.endswith('1') or phoneme.endswith('2') else '0'
51
+
52
+ return stress_pattern, ' '.join(arpabet_list), syllable_count, ipa_raw
53
+
54
+ def analyze_line(self, line):
55
+ words = re.findall(r"[\w']+|[.,!?;:]+", line)
56
+
57
+ lexical_stress_pattern, word_map = "", []
58
+
59
+ for word in words:
60
+ if re.match(r"[\w']+", word):
61
+ pattern, _, syllables, _ = self.get_word_data(word)
62
+ if pattern is not None:
63
+ lexical_stress_pattern += pattern
64
+ word_map.extend([word.lower()] * syllables)
65
+
66
+ line_syllables = len(lexical_stress_pattern)
67
+ if line_syllables == 0:
68
+ return {"original": line, "meter": "Unknown", "score": 1.0, "syllables": 0, "final_stress_pattern": ""}
69
+
70
+ # ** NEW WEIGHTED SCORING LOGIC **
71
+ scores = {}
72
+ for name, pattern in self.METERS.items():
73
+ template = (pattern * (line_syllables // len(pattern) + 1))[:line_syllables]
74
+ mismatch_score = 0
75
+ for i in range(line_syllables):
76
+ if lexical_stress_pattern[i] != template[i]:
77
+ # Penalize mismatches on function words less
78
+ weight = 0.5 if word_map[i] in self.FUNCTION_WORDS else 1.0
79
+ mismatch_score += weight
80
+ scores[name] = mismatch_score / line_syllables
81
+
82
+ best_meter_name = min(scores, key=scores.get)
83
+ best_meter_score = scores[best_meter_name]
84
+
85
+ winning_meter_pattern = self.METERS[best_meter_name]
86
+ final_stress_pattern = (winning_meter_pattern * (line_syllables // len(winning_meter_pattern) + 1))[:line_syllables]
87
+
88
+ foot_len = len(winning_meter_pattern)
89
+ num_feet = round(line_syllables / foot_len) if foot_len > 0 else 0
90
+ foot_map = {0:"0-foot", 1:"monometer", 2:"dimeter", 3:"trimeter", 4:"tetrameter", 5:"pentameter", 6:"hexameter"}
91
+ meter_desc = f"{best_meter_name} {foot_map.get(num_feet, f'{num_feet}-foot')}"
92
+
93
+ return {"original": line, "meter": meter_desc, "score": best_meter_score, "syllables": line_syllables, "final_stress_pattern": final_stress_pattern}
94
+
95
+ def format_line(self, line, analysis_result, format_type):
96
+ words = re.findall(r"[\w']+|[^\w']+", line)
97
+ output_parts = []
98
+
99
+ if format_type == 'arpabet' or format_type == 'ipa':
100
+ for word in words:
101
+ if re.match(r"[\w']+", word):
102
+ _, arpabet, _, ipa = self.get_word_data(word)
103
+ output_parts.append(ipa if format_type == 'ipa' and ipa else f"[{arpabet}]")
104
+ else:
105
+ output_parts.append(word)
106
+ return " ".join(output_parts)
107
+
108
+ final_stress = analysis_result['final_stress_pattern']
109
+ syllable_cursor = 0
110
+ for word in words:
111
+ if not re.match(r"[\w']+", word):
112
+ output_parts.append(word)
113
+ continue
114
+
115
+ lexical_pattern, _, num_syllables, _ = self.get_word_data(word)
116
+ if lexical_pattern is None:
117
+ output_parts.append(f"<{word}>")
118
+ continue
119
+
120
+ word_stress_segment = final_stress[syllable_cursor : syllable_cursor + num_syllables]
121
+
122
+ if format_type == 'plus':
123
+ output_parts.append(f"+{word}" if '1' in word_stress_segment else word)
124
+ elif format_type == 'accent':
125
+ temp_word = list(word)
126
+ vowel_groups = list(re.finditer(r'[aeiouy]+', word, re.IGNORECASE))
127
+ for i, stress_char in enumerate(word_stress_segment):
128
+ if stress_char == '1' and i < len(vowel_groups):
129
+ vowel_match = vowel_groups[i]
130
+ pos = vowel_match.start()
131
+ char_to_stress = temp_word[pos]
132
+ temp_word[pos] = self.ACCENT_MAP.get(char_to_stress, char_to_stress)
133
+ output_parts.append("".join(temp_word))
134
+
135
+ syllable_cursor += num_syllables
136
+
137
+ return " ".join(output_parts).replace(" ,", ",").replace(" .", ".").strip()
138
+
139
+ class ScansionApp:
140
+ def __init__(self, root):
141
+ self.root = root
142
+ self.root.title("English Poetry Scansion Tool")
143
+ self.root.geometry("1000x900")
144
+ self.VOWELS = "aeiouyAEIOUY"
145
+
146
+ self.style = ttk.Style()
147
+ try: self.style.theme_use('aqua')
148
+ except tk.TclError: self.style.theme_use('default')
149
+ self.style.configure('TFrame', background='#ECECEC')
150
+ self.style.configure('TButton', font=('Helvetica Neue', 13, 'bold'), padding=10)
151
+ self.style.configure('TLabel', font=('Helvetica Neue', 14), background='#ECECEC')
152
+ self.style.configure('Header.TLabel', font=('Helvetica Neue', 18, 'bold'))
153
+
154
+ self.main_frame = ttk.Frame(root, padding="20")
155
+ self.main_frame.pack(fill=tk.BOTH, expand=True)
156
+
157
+ ttk.Label(self.main_frame, text="Poetry Scansion Analyzer", style='Header.TLabel').pack(pady=(0, 20))
158
+ self.input_text = scrolledtext.ScrolledText(self.main_frame, height=8, font=("Menlo", 12), wrap=tk.WORD)
159
+ self.input_text.pack(fill=tk.BOTH, expand=True, pady=5)
160
+ self.input_text.insert(tk.END, "But I will ignite\nIn your eyes a fire\nNow I give you strength\nNow I give you power")
161
+
162
+ ttk.Button(self.main_frame, text="Analyze Poem", command=self.perform_scan).pack(pady=15)
163
+
164
+ self.output_text = tk.Text(self.main_frame, font=("Menlo", 12), wrap=tk.WORD, background="black", relief=tk.SOLID, borderwidth=1, padx=10, pady=10)
165
+ self.output_text.pack(fill=tk.BOTH, expand=True, pady=5)
166
+
167
+ self.output_text.tag_configure('vowel', foreground="#9370DB")
168
+ self.output_text.tag_configure('stressed_vowel', foreground="#FF474C", font=("Menlo", 12, "bold"))
169
+ self.output_text.tag_configure('consonant', foreground="#FFC700")
170
+ self.output_text.tag_configure('punctuation', foreground="#32CD32")
171
+ self.output_text.tag_configure('info', foreground="white")
172
+ self.output_text.tag_configure('separator', foreground="#555555")
173
+ self.output_text.tag_configure('header', foreground="white", font=("Menlo", 14, "bold", "underline"), justify='center')
174
+
175
+ self.output_text.config(state=tk.DISABLED)
176
+
177
+ try:
178
+ self.scanner = EnglishScansion(dict_path="english_phonetic_dict.pkl")
179
+ except Exception as e:
180
+ messagebox.showerror("Fatal Error", str(e))
181
+ self.root.destroy()
182
+
183
+ def perform_scan(self):
184
+ poem_text = self.input_text.get("1.0", tk.END)
185
+ if not poem_text.strip(): return
186
+
187
+ self.output_text.config(state=tk.NORMAL)
188
+ self.output_text.delete("1.0", tk.END)
189
+
190
+ accented_poem_lines, ipa_poem_lines = [], []
191
+
192
+ for line in poem_text.strip().split('\n'):
193
+ if not line.strip():
194
+ self.output_text.insert(tk.END, '\n')
195
+ continue
196
+
197
+ analysis = self.scanner.analyze_line(line)
198
+
199
+ accented_line = self.scanner.format_line(line, analysis, 'accent')
200
+ accented_poem_lines.append(accented_line)
201
+ ipa_poem_lines.append(self.scanner.format_line(line, analysis, 'ipa'))
202
+
203
+ self.render_color_line(self.scanner.format_line(line, analysis, 'plus') + "\n")
204
+ self.render_color_line(accented_line + "\n")
205
+ self.output_text.insert(tk.END, self.scanner.format_line(line, analysis, 'arpabet') + "\n", 'info')
206
+
207
+ info_str = f"Meter: {analysis['meter']} | Syllables: {analysis['syllables']} | Mismatch Score: {analysis['score']:.2f}\n"
208
+ self.output_text.insert(tk.END, info_str, 'info')
209
+ self.output_text.insert(tk.END, "-" * 70 + "\n", 'separator')
210
+
211
+ # Render Summaries
212
+ self.output_text.insert(tk.END, "\n\n")
213
+ self.output_text.insert(tk.END, "Stressed Poem Summary\n", 'header')
214
+ self.output_text.insert(tk.END, "\n")
215
+ for line in accented_poem_lines:
216
+ self.render_color_line(line + "\n")
217
+
218
+ self.output_text.insert(tk.END, "\n\n")
219
+ self.output_text.insert(tk.END, "IPA Phonetic Transcription\n", 'header')
220
+ self.output_text.insert(tk.END, "\n")
221
+ for line in ipa_poem_lines:
222
+ self.output_text.insert(tk.END, line + "\n", 'info')
223
+
224
+ self.output_text.config(state=tk.DISABLED)
225
+
226
+ def render_color_line(self, text):
227
+ accented_vowels = "áéíóúýÁÉÍÓÚÝ"
228
+ for char in text:
229
+ tag = 'info'
230
+ if char in accented_vowels: tag = 'stressed_vowel'
231
+ elif char in self.VOWELS: tag = 'vowel'
232
+ elif char.isalpha(): tag = 'consonant'
233
+ elif char in "<>": tag = 'separator'
234
+ elif char in ".,;:!?'\"": tag = 'punctuation'
235
+ elif char == '+': tag = 'stressed_vowel'
236
+ self.output_text.insert(tk.END, char, tag)
237
+
238
+ if __name__ == "__main__":
239
+ root = tk.Tk()
240
+ app = ScansionApp(root)
241
+ root.mainloop()