HusseinBashir commited on
Commit
66cce84
·
verified ·
1 Parent(s): 0244582

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -1
app.py CHANGED
@@ -38,6 +38,40 @@ shortcut_map = {
38
  "sxp": "saaxiib"
39
  }
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def number_to_words(number):
42
  number = int(number)
43
  if number < 20:
@@ -86,13 +120,19 @@ def normalize_text(text):
86
  text = re.sub(r'(?i)(?<!\w)zamzam(?!\w)', 'samsam', text)
87
 
88
  # ➤ Bedel shortcuts - eray kasta oo qoraalka ku jira beddel
89
- # Ka dhig case-insensitive beddelka
90
  def replace_shortcuts(match):
91
  word = match.group(0).lower()
92
  return shortcut_map.get(word, word)
93
  pattern = re.compile(r'\b(' + '|'.join(re.escape(k) for k in shortcut_map.keys()) + r')\b', re.IGNORECASE)
94
  text = pattern.sub(replace_shortcuts, text)
95
 
 
 
 
 
 
 
 
96
  # ➤ Ka saar tirooyin leh koma iyo tobanle
97
  text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
98
  text = re.sub(r'\.\d+', '', text)
 
38
  "sxp": "saaxiib"
39
  }
40
 
41
+ # Countries dictionary (English to Somali)
42
+ country_map = {
43
+ "somalia": "Soomaaliya",
44
+ "ethiopia": "Itoobiya",
45
+ "kenya": "Kenya",
46
+ "djibouti": "Jabuuti",
47
+ "sudan": "Suudaan",
48
+ "south sudan": "Koonfurta Suudaan",
49
+ "uganda": "Ugaandha",
50
+ "tanzania": "Tansaaniya",
51
+ "egypt": "Masar",
52
+ "libya": "Liibiya",
53
+ "algeria": "Aljeeriya",
54
+ "morocco": "Morooko",
55
+ "tunisia": "Tuniisiya",
56
+ "eritrea": "Eriteriya",
57
+ "malawi": "Malaawi",
58
+ "mozambique": "Mosambiik",
59
+ "zambia": "Sambiya",
60
+ "zimbabwe": "Simbabwe",
61
+ "niger": "Niyjer",
62
+ "nigeria": "Nayjeeriya",
63
+ "united states": "Maraykanka",
64
+ "china": "Shiinaha",
65
+ "india": "Hindiya",
66
+ "russia": "Ruushka",
67
+ "united kingdom": "Boqortooyada Midowday",
68
+ "germany": "Jarmalka",
69
+ "france": "Faransiiska",
70
+ "japan": "Jabaan",
71
+ "canada": "Kanada",
72
+ "australia": "Australia"
73
+ }
74
+
75
  def number_to_words(number):
76
  number = int(number)
77
  if number < 20:
 
120
  text = re.sub(r'(?i)(?<!\w)zamzam(?!\w)', 'samsam', text)
121
 
122
  # ➤ Bedel shortcuts - eray kasta oo qoraalka ku jira beddel
 
123
  def replace_shortcuts(match):
124
  word = match.group(0).lower()
125
  return shortcut_map.get(word, word)
126
  pattern = re.compile(r'\b(' + '|'.join(re.escape(k) for k in shortcut_map.keys()) + r')\b', re.IGNORECASE)
127
  text = pattern.sub(replace_shortcuts, text)
128
 
129
+ # ➤ Bedel magacyada waddamada
130
+ def replace_countries(match):
131
+ word = match.group(0).lower()
132
+ return country_map.get(word, word)
133
+ country_pattern = re.compile(r'\b(' + '|'.join(re.escape(k) for k in country_map.keys()) + r')\b', re.IGNORECASE)
134
+ text = country_pattern.sub(replace_countries, text)
135
+
136
  # ➤ Ka saar tirooyin leh koma iyo tobanle
137
  text = re.sub(r'(\d{1,3})(,\d{3})+', lambda m: m.group(0).replace(",", ""), text)
138
  text = re.sub(r'\.\d+', '', text)