Hindi Support
Browse files- proxies.txt +300 -0
- speech_utils.py +41 -6
proxies.txt
ADDED
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
158.255.77.168:80
|
2 |
+
37.120.172.84:80
|
3 |
+
50.223.246.237:80
|
4 |
+
50.174.7.159:80
|
5 |
+
50.207.199.87:80
|
6 |
+
32.223.6.94:80
|
7 |
+
89.58.53.205:80
|
8 |
+
50.207.199.80:80
|
9 |
+
50.207.199.83:80
|
10 |
+
50.174.7.153:80
|
11 |
+
165.232.129.150:80
|
12 |
+
50.239.72.18:80
|
13 |
+
50.175.212.66:80
|
14 |
+
50.217.226.47:80
|
15 |
+
50.239.72.16:80
|
16 |
+
50.221.74.130:80
|
17 |
+
190.58.248.86:80
|
18 |
+
50.207.199.82:80
|
19 |
+
50.174.7.152:80
|
20 |
+
50.122.86.118:80
|
21 |
+
66.191.31.158:80
|
22 |
+
89.58.45.248:80
|
23 |
+
89.58.28.110:80
|
24 |
+
202.61.199.166:80
|
25 |
+
89.58.55.106:80
|
26 |
+
23.247.136.254:80
|
27 |
+
3.110.60.103:80
|
28 |
+
89.58.52.160:80
|
29 |
+
54.174.151.201:80
|
30 |
+
37.19.208.90:8080
|
31 |
+
50.217.226.43:80
|
32 |
+
50.239.72.17:80
|
33 |
+
50.174.7.157:80
|
34 |
+
50.217.226.44:80
|
35 |
+
50.174.7.158:80
|
36 |
+
50.217.226.42:80
|
37 |
+
50.174.7.155:80
|
38 |
+
50.174.7.162:80
|
39 |
+
213.143.113.82:80
|
40 |
+
50.221.230.186:80
|
41 |
+
50.217.226.41:80
|
42 |
+
50.207.199.86:80
|
43 |
+
0.0.0.0:80
|
44 |
+
211.128.96.206:80
|
45 |
+
68.185.57.66:80
|
46 |
+
50.174.7.156:80
|
47 |
+
50.207.199.81:80
|
48 |
+
127.0.0.7:80
|
49 |
+
99.79.124.70:80
|
50 |
+
109.94.208.128:8888
|
51 |
+
40.76.69.94:8080
|
52 |
+
162.223.90.150:80
|
53 |
+
66.201.7.151:3128
|
54 |
+
3.26.71.155:80
|
55 |
+
47.251.122.81:8888
|
56 |
+
50.202.75.26:80
|
57 |
+
50.239.72.19:80
|
58 |
+
50.175.212.74:80
|
59 |
+
188.68.52.244:80
|
60 |
+
193.30.122.197:80
|
61 |
+
89.58.8.250:80
|
62 |
+
95.216.148.196:80
|
63 |
+
23.247.136.248:80
|
64 |
+
173.212.208.108:2082
|
65 |
+
45.140.143.77:18080
|
66 |
+
200.250.131.218:80
|
67 |
+
89.58.55.33:80
|
68 |
+
47.88.59.79:82
|
69 |
+
43.130.47.134:8080
|
70 |
+
119.156.195.173:3128
|
71 |
+
159.69.57.20:8880
|
72 |
+
50.231.104.58:80
|
73 |
+
8.217.124.178:49440
|
74 |
+
213.148.10.80:3128
|
75 |
+
103.227.187.1:6080
|
76 |
+
45.123.142.11:8181
|
77 |
+
38.159.229.97:999
|
78 |
+
186.96.160.202:999
|
79 |
+
139.59.1.14:80
|
80 |
+
129.226.155.235:8080
|
81 |
+
219.65.73.81:80
|
82 |
+
66.29.154.103:3128
|
83 |
+
50.217.226.40:80
|
84 |
+
81.169.213.169:8888
|
85 |
+
41.59.90.171:80
|
86 |
+
84.39.112.144:3128
|
87 |
+
47.56.110.204:8989
|
88 |
+
128.140.113.110:3128
|
89 |
+
47.238.149.53:8888
|
90 |
+
159.65.245.255:80
|
91 |
+
198.49.68.80:80
|
92 |
+
88.99.209.189:1234
|
93 |
+
159.203.61.169:3128
|
94 |
+
34.143.143.61:7777
|
95 |
+
134.209.29.120:80
|
96 |
+
103.249.120.167:80
|
97 |
+
44.220.205.79:8080
|
98 |
+
103.75.119.185:80
|
99 |
+
198.199.86.11:8080
|
100 |
+
51.159.98.163:8089
|
101 |
+
70.153.208.157:8080
|
102 |
+
57.128.37.47:3128
|
103 |
+
181.119.86.186:999
|
104 |
+
47.91.115.179:8081
|
105 |
+
149.129.255.179:80
|
106 |
+
162.243.149.86:31028
|
107 |
+
172.188.122.92:80
|
108 |
+
172.167.161.8:8080
|
109 |
+
171.248.200.229:10001
|
110 |
+
103.214.102.172:8083
|
111 |
+
103.125.174.5:8080
|
112 |
+
103.48.71.162:82
|
113 |
+
186.190.225.152:999
|
114 |
+
171.22.8.192:80
|
115 |
+
47.251.87.199:1036
|
116 |
+
5.45.126.128:8080
|
117 |
+
158.255.77.169:80
|
118 |
+
72.10.164.178:11517
|
119 |
+
104.223.103.218:3428
|
120 |
+
137.184.174.32:4857
|
121 |
+
200.174.198.86:8888
|
122 |
+
41.59.90.175:80
|
123 |
+
65.38.97.146:3128
|
124 |
+
155.94.128.59:10809
|
125 |
+
198.74.51.79:8888
|
126 |
+
103.82.134.199:8888
|
127 |
+
161.35.70.249:8080
|
128 |
+
47.236.224.32:8080
|
129 |
+
78.108.38.82:8118
|
130 |
+
87.248.129.26:80
|
131 |
+
213.149.184.6:23128
|
132 |
+
178.208.168.81:8118
|
133 |
+
87.106.103.177:3128
|
134 |
+
156.67.28.21:8080
|
135 |
+
85.215.64.49:80
|
136 |
+
192.73.244.36:80
|
137 |
+
144.126.216.57:80
|
138 |
+
23.82.137.157:80
|
139 |
+
143.42.66.91:80
|
140 |
+
185.94.35.26:8085
|
141 |
+
193.233.211.141:8085
|
142 |
+
146.19.44.108:8085
|
143 |
+
193.233.231.194:8085
|
144 |
+
104.239.13.32:6661
|
145 |
+
23.27.75.71:6151
|
146 |
+
31.57.82.108:6689
|
147 |
+
185.226.204.95:5648
|
148 |
+
64.49.37.158:8085
|
149 |
+
166.88.58.222:5947
|
150 |
+
193.233.140.72:8085
|
151 |
+
198.105.122.193:6766
|
152 |
+
185.216.106.213:6290
|
153 |
+
92.113.241.107:6192
|
154 |
+
185.68.152.228:8085
|
155 |
+
162.220.247.170:6765
|
156 |
+
185.226.207.17:5566
|
157 |
+
173.239.219.223:6132
|
158 |
+
83.97.119.200:8085
|
159 |
+
31.58.30.194:6776
|
160 |
+
199.96.166.148:8085
|
161 |
+
64.49.38.215:8085
|
162 |
+
193.233.82.203:8085
|
163 |
+
142.111.113.15:6376
|
164 |
+
185.94.32.47:8085
|
165 |
+
185.226.207.166:5715
|
166 |
+
193.233.229.204:8085
|
167 |
+
64.49.36.46:8085
|
168 |
+
91.243.91.132:8085
|
169 |
+
185.68.152.178:8085
|
170 |
+
140.235.2.65:8085
|
171 |
+
91.243.90.205:8085
|
172 |
+
206.206.69.5:6269
|
173 |
+
107.181.154.141:5819
|
174 |
+
89.249.197.127:6713
|
175 |
+
185.77.223.190:8085
|
176 |
+
88.218.46.93:8085
|
177 |
+
86.38.154.231:5874
|
178 |
+
140.235.3.77:8085
|
179 |
+
45.10.165.133:8085
|
180 |
+
140.235.2.33:8085
|
181 |
+
193.233.230.91:8085
|
182 |
+
193.233.143.130:8085
|
183 |
+
185.226.204.189:5742
|
184 |
+
140.235.0.67:8085
|
185 |
+
104.143.226.241:5844
|
186 |
+
45.66.209.43:8085
|
187 |
+
45.43.70.178:6465
|
188 |
+
212.119.41.227:8085
|
189 |
+
84.46.204.43:6346
|
190 |
+
86.38.236.54:6338
|
191 |
+
194.110.150.96:8085
|
192 |
+
141.98.85.122:8085
|
193 |
+
45.43.64.110:6368
|
194 |
+
185.88.101.15:8085
|
195 |
+
198.105.122.86:6659
|
196 |
+
89.116.78.214:5825
|
197 |
+
212.18.113.217:8085
|
198 |
+
45.132.185.241:8085
|
199 |
+
199.96.165.117:8085
|
200 |
+
91.243.91.201:8085
|
201 |
+
5.183.253.227:8085
|
202 |
+
193.233.210.24:8085
|
203 |
+
91.243.94.11:8085
|
204 |
+
104.143.224.11:5872
|
205 |
+
45.145.129.183:8085
|
206 |
+
142.111.58.110:6688
|
207 |
+
104.239.13.130:6759
|
208 |
+
166.88.58.147:5872
|
209 |
+
89.116.78.197:5808
|
210 |
+
185.88.101.237:8085
|
211 |
+
193.233.83.162:8085
|
212 |
+
67.227.112.62:6102
|
213 |
+
64.49.36.235:8085
|
214 |
+
193.202.16.125:8085
|
215 |
+
193.233.143.85:8085
|
216 |
+
199.96.164.171:8085
|
217 |
+
193.93.192.235:8085
|
218 |
+
91.243.89.75:8085
|
219 |
+
198.105.122.50:6623
|
220 |
+
185.226.204.70:5623
|
221 |
+
146.103.3.72:7125
|
222 |
+
142.147.131.230:6130
|
223 |
+
85.239.56.246:8085
|
224 |
+
45.80.107.31:8085
|
225 |
+
91.243.90.155:8085
|
226 |
+
140.235.2.45:8085
|
227 |
+
62.204.35.110:8085
|
228 |
+
176.126.104.184:8085
|
229 |
+
193.233.231.67:8085
|
230 |
+
104.239.13.2:6631
|
231 |
+
45.145.128.89:8085
|
232 |
+
193.233.140.247:8085
|
233 |
+
91.243.95.66:8085
|
234 |
+
88.218.46.235:8085
|
235 |
+
85.239.58.173:8085
|
236 |
+
185.68.155.103:8085
|
237 |
+
193.163.207.13:8085
|
238 |
+
193.233.138.76:8085
|
239 |
+
193.233.231.224:8085
|
240 |
+
31.57.41.125:5701
|
241 |
+
89.116.78.80:5691
|
242 |
+
91.243.90.44:8085
|
243 |
+
45.80.107.196:8085
|
244 |
+
45.145.129.47:8085
|
245 |
+
64.49.37.221:8085
|
246 |
+
104.143.226.20:5623
|
247 |
+
193.233.248.99:8085
|
248 |
+
45.43.81.114:5761
|
249 |
+
193.233.143.227:8085
|
250 |
+
104.239.13.55:6684
|
251 |
+
86.38.26.123:6288
|
252 |
+
181.41.218.64:5650
|
253 |
+
146.103.3.121:7174
|
254 |
+
31.57.41.93:5669
|
255 |
+
91.243.89.113:8085
|
256 |
+
193.233.137.57:8085
|
257 |
+
199.96.165.241:8085
|
258 |
+
193.93.193.130:8085
|
259 |
+
193.93.195.130:8085
|
260 |
+
84.46.204.223:6526
|
261 |
+
167.253.49.95:8085
|
262 |
+
199.96.165.212:8085
|
263 |
+
193.233.143.60:8085
|
264 |
+
146.103.3.131:7184
|
265 |
+
45.43.70.36:6323
|
266 |
+
170.106.184.175:10212
|
267 |
+
62.204.35.227:8085
|
268 |
+
31.57.82.187:6768
|
269 |
+
140.235.1.169:8085
|
270 |
+
193.233.82.20:8085
|
271 |
+
193.233.210.251:8085
|
272 |
+
154.30.252.153:5284
|
273 |
+
45.43.81.153:5800
|
274 |
+
193.202.16.168:8085
|
275 |
+
217.69.121.57:5722
|
276 |
+
146.185.202.177:8085
|
277 |
+
193.233.228.182:8085
|
278 |
+
193.233.143.18:8085
|
279 |
+
104.239.13.22:6651
|
280 |
+
92.113.241.55:6140
|
281 |
+
167.253.48.251:8085
|
282 |
+
199.96.167.253:8085
|
283 |
+
64.49.36.201:8085
|
284 |
+
45.159.23.254:8085
|
285 |
+
31.57.82.213:6794
|
286 |
+
198.105.111.242:6920
|
287 |
+
198.46.241.80:6615
|
288 |
+
146.185.203.80:8085
|
289 |
+
185.226.204.51:5604
|
290 |
+
104.238.10.77:6023
|
291 |
+
199.96.165.131:8085
|
292 |
+
193.233.231.186:8085
|
293 |
+
69.30.217.114:17016
|
294 |
+
198.46.241.96:6631
|
295 |
+
140.235.1.233:8085
|
296 |
+
31.57.41.42:5618
|
297 |
+
107.181.154.72:5750
|
298 |
+
31.57.41.193:5769
|
299 |
+
64.49.38.107:8085
|
300 |
+
146.103.3.167:7220
|
speech_utils.py
CHANGED
@@ -10,7 +10,26 @@ import asyncio # Import asyncio for await
|
|
10 |
# Configure logging
|
11 |
logging.basicConfig(level=logging.INFO)
|
12 |
logger = logging.getLogger(__name__)
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Define supported languages (using short codes consistent with Whisper/googletrans)
|
15 |
# Note: googletrans uses short codes like 'en', 'hi'. Whisper also detects these.
|
16 |
SUPPORTED_LANGUAGES = {
|
@@ -46,7 +65,7 @@ except Exception as e:
|
|
46 |
model = None
|
47 |
|
48 |
# Initialize the translator
|
49 |
-
translator =
|
50 |
|
51 |
async def process_audio(audio_file_content: bytes, lang1: str, lang2: str):
|
52 |
"""
|
@@ -117,10 +136,26 @@ async def process_audio(audio_file_content: bytes, lang1: str, lang2: str):
|
|
117 |
return {"error": f"Detected language '{detected_lang_code}' was not one of the expected languages: {lang1} or {lang2}."}
|
118 |
|
119 |
# --- Transcription ---
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
# Determine the target language for translation
|
126 |
target_lang = lang2 if detected_lang_code == lang1 else lang1
|
|
|
10 |
# Configure logging
|
11 |
logging.basicConfig(level=logging.INFO)
|
12 |
logger = logging.getLogger(__name__)
|
13 |
+
import random
|
14 |
+
|
15 |
+
def get_random_proxy():
|
16 |
+
proxies_path = os.path.join(os.path.dirname(__file__), "proxies.txt")
|
17 |
+
with open(proxies_path, "r") as f:
|
18 |
+
proxies = [line.strip() for line in f if line.strip()]
|
19 |
+
if not proxies:
|
20 |
+
return None
|
21 |
+
return random.choice(proxies)
|
22 |
+
|
23 |
+
def build_translator_with_proxy():
|
24 |
+
proxy = get_random_proxy()
|
25 |
+
if proxy:
|
26 |
+
proxy_url = f"http://{proxy}"
|
27 |
+
return Translator(
|
28 |
+
service_urls=['translate.googleapis.com'],
|
29 |
+
proxies={"http": proxy_url, "https": proxy_url}
|
30 |
+
)
|
31 |
+
else:
|
32 |
+
return Translator(service_urls=['translate.googleapis.com'])
|
33 |
# Define supported languages (using short codes consistent with Whisper/googletrans)
|
34 |
# Note: googletrans uses short codes like 'en', 'hi'. Whisper also detects these.
|
35 |
SUPPORTED_LANGUAGES = {
|
|
|
65 |
model = None
|
66 |
|
67 |
# Initialize the translator
|
68 |
+
translator = build_translator_with_proxy()
|
69 |
|
70 |
async def process_audio(audio_file_content: bytes, lang1: str, lang2: str):
|
71 |
"""
|
|
|
136 |
return {"error": f"Detected language '{detected_lang_code}' was not one of the expected languages: {lang1} or {lang2}."}
|
137 |
|
138 |
# --- Transcription ---
|
139 |
+
# Force Hindi transcription if detected language is Hindi
|
140 |
+
if detected_lang_code == "hi":
|
141 |
+
options = whisper.DecodingOptions(language="hi", fp16=(device=="cuda"))
|
142 |
+
result = whisper.decode(model, mel, options)
|
143 |
+
transcribed_text = result.text
|
144 |
+
logger.info(f"Transcription (forced Hindi): {transcribed_text}")
|
145 |
+
# If output is mostly Latin, retry with forced Hindi
|
146 |
+
latin_count = sum('a' <= c.lower() <= 'z' for c in transcribed_text)
|
147 |
+
devanagari_count = sum('\u0900' <= c <= '\u097F' for c in transcribed_text)
|
148 |
+
if latin_count > devanagari_count:
|
149 |
+
logger.info("Transcription appears to be in Latin script, retrying with forced Hindi language.")
|
150 |
+
options = whisper.DecodingOptions(language="hi", fp16=(device=="cuda"), task="transcribe")
|
151 |
+
result = whisper.decode(model, mel, options)
|
152 |
+
transcribed_text = result.text
|
153 |
+
logger.info(f"Retried Hindi transcription: {transcribed_text}")
|
154 |
+
else:
|
155 |
+
options = whisper.DecodingOptions(language=detected_lang_code, fp16=(device=="cuda"))
|
156 |
+
result = whisper.decode(model, mel, options)
|
157 |
+
transcribed_text = result.text
|
158 |
+
logger.info(f"Transcription: {transcribed_text}")
|
159 |
|
160 |
# Determine the target language for translation
|
161 |
target_lang = lang2 if detected_lang_code == lang1 else lang1
|