MonilM commited on
Commit
1b14dcd
·
1 Parent(s): e00c066

Hindi Support

Browse files
Files changed (2) hide show
  1. proxies.txt +300 -0
  2. speech_utils.py +41 -6
proxies.txt ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 158.255.77.168:80
2
+ 37.120.172.84:80
3
+ 50.223.246.237:80
4
+ 50.174.7.159:80
5
+ 50.207.199.87:80
6
+ 32.223.6.94:80
7
+ 89.58.53.205:80
8
+ 50.207.199.80:80
9
+ 50.207.199.83:80
10
+ 50.174.7.153:80
11
+ 165.232.129.150:80
12
+ 50.239.72.18:80
13
+ 50.175.212.66:80
14
+ 50.217.226.47:80
15
+ 50.239.72.16:80
16
+ 50.221.74.130:80
17
+ 190.58.248.86:80
18
+ 50.207.199.82:80
19
+ 50.174.7.152:80
20
+ 50.122.86.118:80
21
+ 66.191.31.158:80
22
+ 89.58.45.248:80
23
+ 89.58.28.110:80
24
+ 202.61.199.166:80
25
+ 89.58.55.106:80
26
+ 23.247.136.254:80
27
+ 3.110.60.103:80
28
+ 89.58.52.160:80
29
+ 54.174.151.201:80
30
+ 37.19.208.90:8080
31
+ 50.217.226.43:80
32
+ 50.239.72.17:80
33
+ 50.174.7.157:80
34
+ 50.217.226.44:80
35
+ 50.174.7.158:80
36
+ 50.217.226.42:80
37
+ 50.174.7.155:80
38
+ 50.174.7.162:80
39
+ 213.143.113.82:80
40
+ 50.221.230.186:80
41
+ 50.217.226.41:80
42
+ 50.207.199.86:80
43
+ 0.0.0.0:80
44
+ 211.128.96.206:80
45
+ 68.185.57.66:80
46
+ 50.174.7.156:80
47
+ 50.207.199.81:80
48
+ 127.0.0.7:80
49
+ 99.79.124.70:80
50
+ 109.94.208.128:8888
51
+ 40.76.69.94:8080
52
+ 162.223.90.150:80
53
+ 66.201.7.151:3128
54
+ 3.26.71.155:80
55
+ 47.251.122.81:8888
56
+ 50.202.75.26:80
57
+ 50.239.72.19:80
58
+ 50.175.212.74:80
59
+ 188.68.52.244:80
60
+ 193.30.122.197:80
61
+ 89.58.8.250:80
62
+ 95.216.148.196:80
63
+ 23.247.136.248:80
64
+ 173.212.208.108:2082
65
+ 45.140.143.77:18080
66
+ 200.250.131.218:80
67
+ 89.58.55.33:80
68
+ 47.88.59.79:82
69
+ 43.130.47.134:8080
70
+ 119.156.195.173:3128
71
+ 159.69.57.20:8880
72
+ 50.231.104.58:80
73
+ 8.217.124.178:49440
74
+ 213.148.10.80:3128
75
+ 103.227.187.1:6080
76
+ 45.123.142.11:8181
77
+ 38.159.229.97:999
78
+ 186.96.160.202:999
79
+ 139.59.1.14:80
80
+ 129.226.155.235:8080
81
+ 219.65.73.81:80
82
+ 66.29.154.103:3128
83
+ 50.217.226.40:80
84
+ 81.169.213.169:8888
85
+ 41.59.90.171:80
86
+ 84.39.112.144:3128
87
+ 47.56.110.204:8989
88
+ 128.140.113.110:3128
89
+ 47.238.149.53:8888
90
+ 159.65.245.255:80
91
+ 198.49.68.80:80
92
+ 88.99.209.189:1234
93
+ 159.203.61.169:3128
94
+ 34.143.143.61:7777
95
+ 134.209.29.120:80
96
+ 103.249.120.167:80
97
+ 44.220.205.79:8080
98
+ 103.75.119.185:80
99
+ 198.199.86.11:8080
100
+ 51.159.98.163:8089
101
+ 70.153.208.157:8080
102
+ 57.128.37.47:3128
103
+ 181.119.86.186:999
104
+ 47.91.115.179:8081
105
+ 149.129.255.179:80
106
+ 162.243.149.86:31028
107
+ 172.188.122.92:80
108
+ 172.167.161.8:8080
109
+ 171.248.200.229:10001
110
+ 103.214.102.172:8083
111
+ 103.125.174.5:8080
112
+ 103.48.71.162:82
113
+ 186.190.225.152:999
114
+ 171.22.8.192:80
115
+ 47.251.87.199:1036
116
+ 5.45.126.128:8080
117
+ 158.255.77.169:80
118
+ 72.10.164.178:11517
119
+ 104.223.103.218:3428
120
+ 137.184.174.32:4857
121
+ 200.174.198.86:8888
122
+ 41.59.90.175:80
123
+ 65.38.97.146:3128
124
+ 155.94.128.59:10809
125
+ 198.74.51.79:8888
126
+ 103.82.134.199:8888
127
+ 161.35.70.249:8080
128
+ 47.236.224.32:8080
129
+ 78.108.38.82:8118
130
+ 87.248.129.26:80
131
+ 213.149.184.6:23128
132
+ 178.208.168.81:8118
133
+ 87.106.103.177:3128
134
+ 156.67.28.21:8080
135
+ 85.215.64.49:80
136
+ 192.73.244.36:80
137
+ 144.126.216.57:80
138
+ 23.82.137.157:80
139
+ 143.42.66.91:80
140
+ 185.94.35.26:8085
141
+ 193.233.211.141:8085
142
+ 146.19.44.108:8085
143
+ 193.233.231.194:8085
144
+ 104.239.13.32:6661
145
+ 23.27.75.71:6151
146
+ 31.57.82.108:6689
147
+ 185.226.204.95:5648
148
+ 64.49.37.158:8085
149
+ 166.88.58.222:5947
150
+ 193.233.140.72:8085
151
+ 198.105.122.193:6766
152
+ 185.216.106.213:6290
153
+ 92.113.241.107:6192
154
+ 185.68.152.228:8085
155
+ 162.220.247.170:6765
156
+ 185.226.207.17:5566
157
+ 173.239.219.223:6132
158
+ 83.97.119.200:8085
159
+ 31.58.30.194:6776
160
+ 199.96.166.148:8085
161
+ 64.49.38.215:8085
162
+ 193.233.82.203:8085
163
+ 142.111.113.15:6376
164
+ 185.94.32.47:8085
165
+ 185.226.207.166:5715
166
+ 193.233.229.204:8085
167
+ 64.49.36.46:8085
168
+ 91.243.91.132:8085
169
+ 185.68.152.178:8085
170
+ 140.235.2.65:8085
171
+ 91.243.90.205:8085
172
+ 206.206.69.5:6269
173
+ 107.181.154.141:5819
174
+ 89.249.197.127:6713
175
+ 185.77.223.190:8085
176
+ 88.218.46.93:8085
177
+ 86.38.154.231:5874
178
+ 140.235.3.77:8085
179
+ 45.10.165.133:8085
180
+ 140.235.2.33:8085
181
+ 193.233.230.91:8085
182
+ 193.233.143.130:8085
183
+ 185.226.204.189:5742
184
+ 140.235.0.67:8085
185
+ 104.143.226.241:5844
186
+ 45.66.209.43:8085
187
+ 45.43.70.178:6465
188
+ 212.119.41.227:8085
189
+ 84.46.204.43:6346
190
+ 86.38.236.54:6338
191
+ 194.110.150.96:8085
192
+ 141.98.85.122:8085
193
+ 45.43.64.110:6368
194
+ 185.88.101.15:8085
195
+ 198.105.122.86:6659
196
+ 89.116.78.214:5825
197
+ 212.18.113.217:8085
198
+ 45.132.185.241:8085
199
+ 199.96.165.117:8085
200
+ 91.243.91.201:8085
201
+ 5.183.253.227:8085
202
+ 193.233.210.24:8085
203
+ 91.243.94.11:8085
204
+ 104.143.224.11:5872
205
+ 45.145.129.183:8085
206
+ 142.111.58.110:6688
207
+ 104.239.13.130:6759
208
+ 166.88.58.147:5872
209
+ 89.116.78.197:5808
210
+ 185.88.101.237:8085
211
+ 193.233.83.162:8085
212
+ 67.227.112.62:6102
213
+ 64.49.36.235:8085
214
+ 193.202.16.125:8085
215
+ 193.233.143.85:8085
216
+ 199.96.164.171:8085
217
+ 193.93.192.235:8085
218
+ 91.243.89.75:8085
219
+ 198.105.122.50:6623
220
+ 185.226.204.70:5623
221
+ 146.103.3.72:7125
222
+ 142.147.131.230:6130
223
+ 85.239.56.246:8085
224
+ 45.80.107.31:8085
225
+ 91.243.90.155:8085
226
+ 140.235.2.45:8085
227
+ 62.204.35.110:8085
228
+ 176.126.104.184:8085
229
+ 193.233.231.67:8085
230
+ 104.239.13.2:6631
231
+ 45.145.128.89:8085
232
+ 193.233.140.247:8085
233
+ 91.243.95.66:8085
234
+ 88.218.46.235:8085
235
+ 85.239.58.173:8085
236
+ 185.68.155.103:8085
237
+ 193.163.207.13:8085
238
+ 193.233.138.76:8085
239
+ 193.233.231.224:8085
240
+ 31.57.41.125:5701
241
+ 89.116.78.80:5691
242
+ 91.243.90.44:8085
243
+ 45.80.107.196:8085
244
+ 45.145.129.47:8085
245
+ 64.49.37.221:8085
246
+ 104.143.226.20:5623
247
+ 193.233.248.99:8085
248
+ 45.43.81.114:5761
249
+ 193.233.143.227:8085
250
+ 104.239.13.55:6684
251
+ 86.38.26.123:6288
252
+ 181.41.218.64:5650
253
+ 146.103.3.121:7174
254
+ 31.57.41.93:5669
255
+ 91.243.89.113:8085
256
+ 193.233.137.57:8085
257
+ 199.96.165.241:8085
258
+ 193.93.193.130:8085
259
+ 193.93.195.130:8085
260
+ 84.46.204.223:6526
261
+ 167.253.49.95:8085
262
+ 199.96.165.212:8085
263
+ 193.233.143.60:8085
264
+ 146.103.3.131:7184
265
+ 45.43.70.36:6323
266
+ 170.106.184.175:10212
267
+ 62.204.35.227:8085
268
+ 31.57.82.187:6768
269
+ 140.235.1.169:8085
270
+ 193.233.82.20:8085
271
+ 193.233.210.251:8085
272
+ 154.30.252.153:5284
273
+ 45.43.81.153:5800
274
+ 193.202.16.168:8085
275
+ 217.69.121.57:5722
276
+ 146.185.202.177:8085
277
+ 193.233.228.182:8085
278
+ 193.233.143.18:8085
279
+ 104.239.13.22:6651
280
+ 92.113.241.55:6140
281
+ 167.253.48.251:8085
282
+ 199.96.167.253:8085
283
+ 64.49.36.201:8085
284
+ 45.159.23.254:8085
285
+ 31.57.82.213:6794
286
+ 198.105.111.242:6920
287
+ 198.46.241.80:6615
288
+ 146.185.203.80:8085
289
+ 185.226.204.51:5604
290
+ 104.238.10.77:6023
291
+ 199.96.165.131:8085
292
+ 193.233.231.186:8085
293
+ 69.30.217.114:17016
294
+ 198.46.241.96:6631
295
+ 140.235.1.233:8085
296
+ 31.57.41.42:5618
297
+ 107.181.154.72:5750
298
+ 31.57.41.193:5769
299
+ 64.49.38.107:8085
300
+ 146.103.3.167:7220
speech_utils.py CHANGED
@@ -10,7 +10,26 @@ import asyncio # Import asyncio for await
10
  # Configure logging
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Define supported languages (using short codes consistent with Whisper/googletrans)
15
  # Note: googletrans uses short codes like 'en', 'hi'. Whisper also detects these.
16
  SUPPORTED_LANGUAGES = {
@@ -46,7 +65,7 @@ except Exception as e:
46
  model = None
47
 
48
  # Initialize the translator
49
- translator = Translator()
50
 
51
  async def process_audio(audio_file_content: bytes, lang1: str, lang2: str):
52
  """
@@ -117,10 +136,26 @@ async def process_audio(audio_file_content: bytes, lang1: str, lang2: str):
117
  return {"error": f"Detected language '{detected_lang_code}' was not one of the expected languages: {lang1} or {lang2}."}
118
 
119
  # --- Transcription ---
120
- options = whisper.DecodingOptions(language=detected_lang_code, fp16=(device=="cuda"))
121
- result = whisper.decode(model, mel, options)
122
- transcribed_text = result.text
123
- logger.info(f"Transcription: {transcribed_text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  # Determine the target language for translation
126
  target_lang = lang2 if detected_lang_code == lang1 else lang1
 
10
  # Configure logging
11
  logging.basicConfig(level=logging.INFO)
12
  logger = logging.getLogger(__name__)
13
+ import random
14
+
15
+ def get_random_proxy():
16
+ proxies_path = os.path.join(os.path.dirname(__file__), "proxies.txt")
17
+ with open(proxies_path, "r") as f:
18
+ proxies = [line.strip() for line in f if line.strip()]
19
+ if not proxies:
20
+ return None
21
+ return random.choice(proxies)
22
+
23
+ def build_translator_with_proxy():
24
+ proxy = get_random_proxy()
25
+ if proxy:
26
+ proxy_url = f"http://{proxy}"
27
+ return Translator(
28
+ service_urls=['translate.googleapis.com'],
29
+ proxies={"http": proxy_url, "https": proxy_url}
30
+ )
31
+ else:
32
+ return Translator(service_urls=['translate.googleapis.com'])
33
  # Define supported languages (using short codes consistent with Whisper/googletrans)
34
  # Note: googletrans uses short codes like 'en', 'hi'. Whisper also detects these.
35
  SUPPORTED_LANGUAGES = {
 
65
  model = None
66
 
67
  # Initialize the translator
68
+ translator = build_translator_with_proxy()
69
 
70
  async def process_audio(audio_file_content: bytes, lang1: str, lang2: str):
71
  """
 
136
  return {"error": f"Detected language '{detected_lang_code}' was not one of the expected languages: {lang1} or {lang2}."}
137
 
138
  # --- Transcription ---
139
+ # Force Hindi transcription if detected language is Hindi
140
+ if detected_lang_code == "hi":
141
+ options = whisper.DecodingOptions(language="hi", fp16=(device=="cuda"))
142
+ result = whisper.decode(model, mel, options)
143
+ transcribed_text = result.text
144
+ logger.info(f"Transcription (forced Hindi): {transcribed_text}")
145
+ # If output is mostly Latin, retry with forced Hindi
146
+ latin_count = sum('a' <= c.lower() <= 'z' for c in transcribed_text)
147
+ devanagari_count = sum('\u0900' <= c <= '\u097F' for c in transcribed_text)
148
+ if latin_count > devanagari_count:
149
+ logger.info("Transcription appears to be in Latin script, retrying with forced Hindi language.")
150
+ options = whisper.DecodingOptions(language="hi", fp16=(device=="cuda"), task="transcribe")
151
+ result = whisper.decode(model, mel, options)
152
+ transcribed_text = result.text
153
+ logger.info(f"Retried Hindi transcription: {transcribed_text}")
154
+ else:
155
+ options = whisper.DecodingOptions(language=detected_lang_code, fp16=(device=="cuda"))
156
+ result = whisper.decode(model, mel, options)
157
+ transcribed_text = result.text
158
+ logger.info(f"Transcription: {transcribed_text}")
159
 
160
  # Determine the target language for translation
161
  target_lang = lang2 if detected_lang_code == lang1 else lang1