amirgame197 commited on
Commit
fe9d818
·
verified ·
1 Parent(s): 7f88060

Upload 2 files

Browse files
Files changed (2) hide show
  1. app_utils.py +369 -0
  2. vits-piper-fa-ganji.onnx +3 -0
app_utils.py ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import shutil
4
+ import subprocess
5
+ import requests
6
+ import tarfile
7
+ from pathlib import Path
8
+ import soundfile as sf
9
+ import sherpa_onnx
10
+ import numpy as np
11
+
12
+
13
+ models = [
14
+ ['mms fa','https://huggingface.co/willwade/mms-tts-multilingual-models-onnx/resolve/main/fas',"🌠 راد",'https://huggingface.co/facebook/mms-tts-fas'],
15
+ ['coqui-vits-female1-karim23657','https://huggingface.co/karim23657/persian-tts-vits/tree/main/persian-tts-female1-vits-coqui',"🌺 نگار",'https://huggingface.co/Kamtera/persian-tts-female1-vits'],
16
+ ['coqui-vits-male1-karim23657','https://huggingface.co/karim23657/persian-tts-vits/tree/main/persian-tts-male1-vits-coqui',"🌟 آرش",'https://huggingface.co/Kamtera/persian-tts-male1-vits'],
17
+ ['coqui-vits-male-karim23657','https://huggingface.co/karim23657/persian-tts-vits/tree/main/male-male-coqui-vits',"🦁 کیان",'https://huggingface.co/Kamtera/persian-tts-male-vits'],
18
+ ['coqui-vits-female-karim23657','https://huggingface.co/karim23657/persian-tts-vits/tree/main/female-female-coqui-vits',"🌷 مهتاب",'https://huggingface.co/Kamtera/persian-tts-female-vits'],
19
+ ['coqui-vits-female-GPTInformal-karim23657','https://huggingface.co/karim23657/persian-tts-vits/tree/main/female-GPTInformal-coqui-vits',"🌼 شیوا",'https://huggingface.co/karim23657/persian-tts-female-GPTInformal-Persian-vits'],
20
+ ['coqui-vits-male-SmartGitiCorp','https://huggingface.co/karim23657/persian-tts-vits/tree/main/male-SmartGitiCorp-coqui-vits',"🚀 بهمن",'https://huggingface.co/SmartGitiCorp/persian_tts_vits'],
21
+ ['vits-piper-fa-ganji','https://huggingface.co/karim23657/persian-tts-vits/tree/main/vits-piper-fa-ganji',"🚀 برنا",'https://huggingface.co/SadeghK/persian-text-to-speech'],
22
+ ['vits-piper-fa-ganji-adabi','https://huggingface.co/karim23657/persian-tts-vits/tree/main/vits-piper-fa-ganji-adabi',"🚀 برنا-1",'https://huggingface.co/SadeghK/persian-text-to-speech'],
23
+ ['vits-piper-fa-gyro-medium','https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-fa_IR-gyro-medium.tar.bz2',"💧 نیما",'https://huggingface.co/gyroing/Persian-Piper-Model-gyro'],
24
+ ['piper-fa-amir-medium','https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-fa_IR-amir-medium.tar.bz2',"⚡️ آریا",'https://huggingface.co/SadeghK/persian-text-to-speech'],
25
+ ['vits-mimic3-fa-haaniye_low','https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-mimic3-fa-haaniye_low.tar.bz2',"🌹 ریما",'https://github.com/MycroftAI/mimic3'],
26
+ ['vits-piper-fa_en-rezahedayatfar-ibrahimwalk-medium','https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-fa_en-rezahedayatfar-ibrahimwalk-medium.tar.bz2',"🌠 پیام",'https://huggingface.co/mah92/persian-english-piper-tts-model'],
27
+ ]
28
+
29
+ def download_and_extract_model(url, destination):
30
+ """Download and extract the model files."""
31
+ print(f"Downloading from URL: {url}")
32
+ print(f"Destination: {destination}")
33
+
34
+ # Convert Hugging Face URL format if needed
35
+ if "huggingface.co" in url:
36
+ # Replace /tree/main/ with /resolve/main/ for direct file download
37
+ base_url = url.replace("/tree/main/", "/resolve/main/")
38
+ model_id = base_url.split("/")[-1]
39
+
40
+ # Check if this is an MMS model
41
+ is_mms_model = True
42
+
43
+ if is_mms_model:
44
+ # MMS models have both model.onnx and tokens.txt
45
+ model_url = f"{base_url}/model.onnx"
46
+ tokens_url = f"{base_url}/tokens.txt"
47
+
48
+ # Download model.onnx
49
+ print("Downloading model.onnx...")
50
+ model_path = os.path.join(destination, "model.onnx")
51
+ response = requests.get(model_url, stream=True)
52
+ if response.status_code != 200:
53
+ raise Exception(f"Failed to download model from {model_url}. Status code: {response.status_code}")
54
+
55
+ total_size = int(response.headers.get('content-length', 0))
56
+ block_size = 8192
57
+ downloaded = 0
58
+
59
+ print(f"Total size: {total_size / (1024*1024):.1f} MB")
60
+ with open(model_path, "wb") as f:
61
+ for chunk in response.iter_content(chunk_size=block_size):
62
+ if chunk:
63
+ f.write(chunk)
64
+ downloaded += len(chunk)
65
+ if total_size > 0:
66
+ percent = int((downloaded / total_size) * 100)
67
+ if percent % 10 == 0:
68
+ print(f" {percent}%", end="", flush=True)
69
+ print("\nModel download complete")
70
+
71
+ # Download tokens.txt
72
+ print("Downloading tokens.txt...")
73
+ tokens_path = os.path.join(destination, "tokens.txt")
74
+ response = requests.get(tokens_url, stream=True)
75
+ if response.status_code != 200:
76
+ raise Exception(f"Failed to download tokens from {tokens_url}. Status code: {response.status_code}")
77
+
78
+ with open(tokens_path, "wb") as f:
79
+ f.write(response.content)
80
+ print("Tokens download complete")
81
+
82
+ return
83
+ else:
84
+ # Other models are stored as tar.bz2 files
85
+ url = f"{base_url}.tar.bz2"
86
+
87
+ # Try the URL
88
+ response = requests.get(url, stream=True)
89
+ if response.status_code != 200:
90
+ raise Exception(f"Failed to download model from {url}. Status code: {response.status_code}")
91
+
92
+ # Check if this is a Git LFS file pointer
93
+ content_start = response.content[:100].decode('utf-8', errors='ignore')
94
+ if content_start.startswith('version https://git-lfs.github.com/spec/v1'):
95
+ raise Exception(f"Received Git LFS pointer instead of file content from {url}")
96
+
97
+ # Create model directory if it doesn't exist
98
+ os.makedirs(destination, exist_ok=True)
99
+
100
+ # For non-MMS models, handle tar.bz2 files
101
+ tar_path = os.path.join(destination, "model.tar.bz2")
102
+
103
+ # Download the file
104
+ print("Downloading model archive...")
105
+ response = requests.get(url, stream=True)
106
+ total_size = int(response.headers.get('content-length', 0))
107
+ block_size = 8192
108
+ downloaded = 0
109
+
110
+ print(f"Total size: {total_size / (1024*1024):.1f} MB")
111
+ with open(tar_path, "wb") as f:
112
+ for chunk in response.iter_content(chunk_size=block_size):
113
+ if chunk:
114
+ f.write(chunk)
115
+ downloaded += len(chunk)
116
+ if total_size > 0:
117
+ percent = int((downloaded / total_size) * 100)
118
+ if percent % 10 == 0:
119
+ print(f" {percent}%", end="", flush=True)
120
+ print("\nDownload complete")
121
+
122
+ # Extract the tar.bz2 file
123
+ print(f"Extracting {tar_path} to {destination}")
124
+ try:
125
+ with tarfile.open(tar_path, "r:bz2") as tar:
126
+ tar.extractall(path=destination)
127
+ os.remove(tar_path)
128
+ print("Extraction complete")
129
+ except Exception as e:
130
+ print(f"Error during extraction: {str(e)}")
131
+ raise
132
+
133
+ print("Contents of destination directory:")
134
+ for root, dirs, files in os.walk(destination):
135
+ print(f"\nDirectory: {root}")
136
+ if dirs:
137
+ print(" Subdirectories:", dirs)
138
+ if files:
139
+ print(" Files:", files)
140
+
141
+ def dl_espeak_data():
142
+ # Download the file
143
+ tar_path='espeak-ng-data.tar.bz2'
144
+ print("Downloading model archive...")
145
+ response = requests.get('https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2', stream=True)
146
+ total_size = int(response.headers.get('content-length', 0))
147
+ block_size = 8192
148
+ downloaded = 0
149
+
150
+ print(f"Total size: {total_size / (1024*1024):.1f} MB")
151
+ with open(tar_path, "wb") as f:
152
+ for chunk in response.iter_content(chunk_size=block_size):
153
+ if chunk:
154
+ f.write(chunk)
155
+ downloaded += len(chunk)
156
+ if total_size > 0:
157
+ percent = int((downloaded / total_size) * 100)
158
+ if percent % 10 == 0:
159
+ print(f" {percent}%", end="", flush=True)
160
+ print("\nDownload complete")
161
+
162
+ # Extract the tar.bz2 file
163
+ destination=os.path.dirname(os.path.abspath(__file__))
164
+ print(f"Extracting {tar_path} to {destination}")
165
+ try:
166
+ with tarfile.open(tar_path, "r:bz2") as tar:
167
+ tar.extractall(path=destination)
168
+ os.remove(tar_path)
169
+ print("Extraction complete")
170
+ except Exception as e:
171
+ print(f"Error during extraction: {str(e)}")
172
+ raise
173
+
174
+ print("Contents of destination directory:")
175
+ for root, dirs, files in os.walk(destination):
176
+ print(f"\nDirectory: {root}")
177
+ if dirs:
178
+ print(" Subdirectories:", dirs)
179
+ if files:
180
+ print(" Files:", files)
181
+
182
+ dl_espeak_data()
183
+
184
+ def find_model_files(model_dir):
185
+ """Find model files in the given directory and its subdirectories."""
186
+ model_files = {}
187
+
188
+ # Check if this is an MMS model
189
+ is_mms = True
190
+
191
+ for root, _, files in os.walk(model_dir):
192
+ for file in files:
193
+ file_path = os.path.join(root, file)
194
+
195
+ # Model file
196
+ if file.endswith('.onnx'):
197
+ model_files['model'] = file_path
198
+
199
+ # Tokens file
200
+ elif file == 'tokens.txt':
201
+ model_files['tokens'] = file_path
202
+
203
+ # Lexicon file (only for non-MMS models)
204
+ elif file == 'lexicon.txt' and not is_mms:
205
+ model_files['lexicon'] = file_path
206
+
207
+ # Create empty lexicon file if needed (only for non-MMS models)
208
+ if not is_mms and 'model' in model_files and 'lexicon' not in model_files:
209
+ model_dir = os.path.dirname(model_files['model'])
210
+ lexicon_path = os.path.join(model_dir, 'lexicon.txt')
211
+ with open(lexicon_path, 'w', encoding='utf-8') as f:
212
+ pass # Create empty file
213
+ model_files['lexicon'] = lexicon_path
214
+
215
+ return model_files if 'model' in model_files else {}
216
+
217
+ def generate_audio(text, model_info):
218
+ """Generate audio from text using the specified model."""
219
+ try:
220
+ model_dir = os.path.join("./models", model_info)
221
+
222
+ print(f"\nLooking for model in: {model_dir}")
223
+
224
+ # Download model if it doesn't exist
225
+ if not os.path.exists(model_dir):
226
+ print(f"Model directory doesn't exist, downloading {model_info}...")
227
+ os.makedirs(model_dir, exist_ok=True)
228
+ for i in models:
229
+ if model_info == i[2]:
230
+ model_url=i[1]
231
+ download_and_extract_model(model_url, model_dir)
232
+
233
+ print(f"Contents of {model_dir}:")
234
+ for item in os.listdir(model_dir):
235
+ item_path = os.path.join(model_dir, item)
236
+ if os.path.isdir(item_path):
237
+ print(f" Directory: {item}")
238
+ print(f" Contents: {os.listdir(item_path)}")
239
+ else:
240
+ print(f" File: {item}")
241
+
242
+ # Find and validate model files
243
+ model_files = find_model_files(model_dir)
244
+ if not model_files or 'model' not in model_files:
245
+ raise ValueError(f"Could not find required model files in {model_dir}")
246
+
247
+ print("\nFound model files:")
248
+ print(f"Model: {model_files['model']}")
249
+ print(f"Tokens: {model_files.get('tokens', 'Not found')}")
250
+ print(f"Lexicon: {model_files.get('lexicon', 'Not required for MMS')}\n")
251
+
252
+ # Check if this is an MMS model
253
+ is_mms = 'mms' in os.path.basename(model_dir).lower()
254
+
255
+ # Create configuration based on model type
256
+ if is_mms:
257
+ if 'tokens' not in model_files or not os.path.exists(model_files['tokens']):
258
+ raise ValueError("tokens.txt is required for MMS models")
259
+
260
+ # MMS models use tokens.txt and no lexicon
261
+ vits_config = sherpa_onnx.OfflineTtsVitsModelConfig(
262
+ model_files['model'], # model
263
+ '', # lexicon
264
+ model_files['tokens'], # tokens
265
+ '', # data_dir
266
+ '', # dict_dir
267
+ 0.667, # noise_scale
268
+ 0.8, # noise_scale_w
269
+ 1.0 # length_scale
270
+ )
271
+ else:
272
+ # Non-MMS models use lexicon.txt
273
+ if 'tokens' not in model_files or not os.path.exists(model_files['tokens']):
274
+ raise ValueError("tokens.txt is required for VITS models")
275
+
276
+ # Set data dir if it exists
277
+ espeak_data = os.path.join(os.path.dirname(model_files['model']), 'espeak-ng-data')
278
+ data_dir = espeak_data if os.path.exists(espeak_data) else 'espeak-ng-data'
279
+
280
+ # Get lexicon path if it exists
281
+ lexicon = model_files.get('lexicon', '') if os.path.exists(model_files.get('lexicon', '')) else ''
282
+
283
+ # Create VITS model config
284
+ vits_config = sherpa_onnx.OfflineTtsVitsModelConfig(
285
+ model_files['model'], # model
286
+ lexicon, # lexicon
287
+ model_files['tokens'], # tokens
288
+ data_dir, # data_dir
289
+ '', # dict_dir
290
+ 0.667, # noise_scale
291
+ 0.8, # noise_scale_w
292
+ 1.0 # length_scale
293
+ )
294
+
295
+ # Create the model config with VITS
296
+ model_config = sherpa_onnx.OfflineTtsModelConfig()
297
+ model_config.vits = vits_config
298
+
299
+ # Create TTS configuration
300
+ config = sherpa_onnx.OfflineTtsConfig(
301
+ model=model_config,
302
+ max_num_sentences=2
303
+ )
304
+
305
+ # Initialize TTS engine
306
+ tts = sherpa_onnx.OfflineTts(config)
307
+
308
+ # Generate audio
309
+ audio_data = tts.generate(text)
310
+
311
+ # Ensure we have valid audio data
312
+ if audio_data is None or len(audio_data.samples) == 0:
313
+ raise ValueError("Failed to generate audio - no data generated")
314
+
315
+ # Convert samples list to numpy array and normalize
316
+ audio_array = np.array(audio_data.samples, dtype=np.float32)
317
+ if np.any(audio_array): # Check if array is not all zeros
318
+ audio_array = audio_array / np.abs(audio_array).max()
319
+ else:
320
+ raise ValueError("Generated audio is empty")
321
+
322
+ # Return in Gradio's expected format (numpy array, sample rate)
323
+ return (audio_array, audio_data.sample_rate)
324
+
325
+ except Exception as e:
326
+ error_msg = str(e)
327
+ # Check for OOV or token conversion errors
328
+ if "out of vocabulary" in error_msg.lower() or "token" in error_msg.lower():
329
+ error_msg = f"Text contains unsupported characters: {error_msg}"
330
+ print(f"Error generating audio: {error_msg}")
331
+ print(f"Error in TTS generation: {error_msg}")
332
+ raise
333
+
334
+ def tts_interface(selected_model, text, status_output):
335
+ try:
336
+ if not text.strip():
337
+ return None, "Please enter some text"
338
+
339
+
340
+ model_id = selected_model
341
+ # Store original text for status message
342
+ original_text = text
343
+
344
+
345
+ try:
346
+ # Update status with language info
347
+ voice_name = model_id
348
+ status = f"Generating speech using {voice_name} ..."
349
+
350
+ # Generate audio
351
+ audio_data, sample_rate = generate_audio(text, model_id)
352
+
353
+ # Include translation info in final status if text was actually translated
354
+ final_status = f"Generated speech using {voice_name}"
355
+ final_status += f"\nText: '{text}'"
356
+
357
+ return (sample_rate, audio_data), final_status
358
+ except ValueError as e:
359
+ # Handle known errors with user-friendly messages
360
+ error_msg = str(e)
361
+ if "cannot process some words" in error_msg.lower():
362
+ return None, error_msg
363
+ return None, f"Error: {error_msg}"
364
+
365
+ except Exception as e:
366
+ print(f"Error in TTS generation: {str(e)}")
367
+ error_msg = str(e)
368
+ return None, f"Error: {error_msg}"
369
+
vits-piper-fa-ganji.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e35c08741b63b570a40c08d411c49c6fea754e263e86ce8343fb9f19119a03
3
+ size 63516173