Spaces:

HusseinBashir
/

Somali_tts

Running

HusseinBashir commited on 4 days ago

Commit

0244582

verified ·

1 Parent(s): 38b2d32

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -85,6 +85,14 @@ def normalize_text(text):
     # ➤ Beddel ereyga zamzam dhammaan noocyadiisa (bilow, dhex, dhammaad)
     text = re.sub(r'(?i)(?<!\w)zamzam(?!\w)', 'samsam', text)
     # ➤ Ka saar tirooyin leh koma iyo tobanle
     text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
     text = re.sub(r'\.\d+', '', text)
@@ -108,14 +116,12 @@ def normalize_text(text):
     text = text.replace("KH", "qa").replace("Z", "S")
     text = text.replace("SH", "SHa'a").replace("DH", "Dha'a")
-    # ➤ Haddii jumladda ku dhammaato zamzam ama samsam, ku dar " ah"
     if re.search(r'(?i)(zamzam|samsam)[\s\.\,\!\?]*$', text.strip()):
         text += " m"
     return text
 def tts(text):
     paragraphs = text.strip().split("\n")
     audio_list = []

     # ➤ Beddel ereyga zamzam dhammaan noocyadiisa (bilow, dhex, dhammaad)
     text = re.sub(r'(?i)(?<!\w)zamzam(?!\w)', 'samsam', text)
+    # ➤ Bedel shortcuts - eray kasta oo qoraalka ku jira beddel
+    # Ka dhig case-insensitive beddelka
+    def replace_shortcuts(match):
+        word = match.group(0).lower()
+        return shortcut_map.get(word, word)
+    pattern = re.compile(r'\b(' + '|'.join(re.escape(k) for k in shortcut_map.keys()) + r')\b', re.IGNORECASE)
+    text = pattern.sub(replace_shortcuts, text)
     # ➤ Ka saar tirooyin leh koma iyo tobanle
     text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
     text = re.sub(r'\.\d+', '', text)
     text = text.replace("KH", "qa").replace("Z", "S")
     text = text.replace("SH", "SHa'a").replace("DH", "Dha'a")
+    # ➤ Haddii jumladda ku dhammaato zamzam ama samsam, ku dar " m" (silent m)
     if re.search(r'(?i)(zamzam|samsam)[\s\.\,\!\?]*$', text.strip()):
         text += " m"
     return text
 def tts(text):
     paragraphs = text.strip().split("\n")
     audio_list = []