Spaces:
Running
Running
Irpan
commited on
Commit
·
499b2c1
1
Parent(s):
c492cbb
asr
Browse files
asr.py
CHANGED
@@ -13,13 +13,13 @@ import util
|
|
13 |
|
14 |
# Load processor and model
|
15 |
models_info = {
|
16 |
-
"OpenAI-Whisper
|
17 |
"processor": WhisperProcessor.from_pretrained("openai/whisper-small", language="uzbek", task="transcribe"),
|
18 |
"model": AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small"),
|
19 |
"ctc_model": False,
|
20 |
"arabic_script": False
|
21 |
},
|
22 |
-
"Meta-MMS
|
23 |
"processor": AutoProcessor.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic'),
|
24 |
"model": AutoModelForCTC.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic', ignore_mismatched_sizes=True),
|
25 |
"ctc_model": True,
|
@@ -63,10 +63,10 @@ def transcribe(audio_data, model_id) -> str:
|
|
63 |
else:
|
64 |
return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data)), None
|
65 |
|
66 |
-
# Check audio duration
|
67 |
-
duration = audio_input.shape[1] / sampling_rate
|
68 |
-
if duration > 10:
|
69 |
-
|
70 |
|
71 |
model = models_info[model_id]["model"]
|
72 |
processor = models_info[model_id]["processor"]
|
|
|
13 |
|
14 |
# Load processor and model
|
15 |
models_info = {
|
16 |
+
"OpenAI-Whisper": {
|
17 |
"processor": WhisperProcessor.from_pretrained("openai/whisper-small", language="uzbek", task="transcribe"),
|
18 |
"model": AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small"),
|
19 |
"ctc_model": False,
|
20 |
"arabic_script": False
|
21 |
},
|
22 |
+
"Meta-MMS": {
|
23 |
"processor": AutoProcessor.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic'),
|
24 |
"model": AutoModelForCTC.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic', ignore_mismatched_sizes=True),
|
25 |
"ctc_model": True,
|
|
|
63 |
else:
|
64 |
return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data)), None
|
65 |
|
66 |
+
# # Check audio duration
|
67 |
+
# duration = audio_input.shape[1] / sampling_rate
|
68 |
+
# if duration > 10:
|
69 |
+
# return f"<<ERROR: Audio duration ({duration:.2f}s) exceeds 10 seconds. Please upload a shorter audio clip for faster processing.>>", None
|
70 |
|
71 |
model = models_info[model_id]["model"]
|
72 |
processor = models_info[model_id]["processor"]
|
tts.py
CHANGED
@@ -49,8 +49,8 @@ text2speech.spc2wav = None ### disable griffin-lim
|
|
49 |
|
50 |
def synthesize(text, model_id):
|
51 |
print(text)
|
52 |
-
if len(text) > 200:
|
53 |
-
|
54 |
|
55 |
if model_id == 'IS2AI-TurkicTTS':
|
56 |
return synthesize_turkic_tts(text)
|
|
|
49 |
|
50 |
def synthesize(text, model_id):
|
51 |
print(text)
|
52 |
+
# if len(text) > 200:
|
53 |
+
# raise ValueError(f"Input text exceeds 200 characters. Please provide a shorter input text for faster processing.")
|
54 |
|
55 |
if model_id == 'IS2AI-TurkicTTS':
|
56 |
return synthesize_turkic_tts(text)
|
util.py
CHANGED
@@ -4,14 +4,25 @@ from umsc import UgMultiScriptConverter
|
|
4 |
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
|
5 |
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
|
6 |
|
7 |
-
asr_examples = [
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
tts_examples = [
|
11 |
-
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
|
12 |
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "IS2AI-TurkicTTS"],
|
13 |
-
["
|
14 |
-
["
|
|
|
|
|
|
|
|
|
15 |
["Yaxshimusiz?", "Meta-MMS"],
|
16 |
-
["Yaxshimusiz?", "
|
17 |
]
|
|
|
4 |
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
|
5 |
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
|
6 |
|
7 |
+
asr_examples = [
|
8 |
+
['examples/1.wav', 'OpenAI-Whisper'],
|
9 |
+
['examples/1.wav', 'Meta-MMS'],
|
10 |
+
['examples/1.wav', 'Ixxan-FineTuned-Whisper'],
|
11 |
+
['examples/1.wav', 'Ixxan-FineTuned-MMS'],
|
12 |
+
['examples/2.wav', 'OpenAI-Whisper'],
|
13 |
+
['examples/2.wav', 'Meta-MMS'],
|
14 |
+
['examples/2.wav', 'Ixxan-FineTuned-Whisper'],
|
15 |
+
['examples/2.wav', 'Ixxan-FineTuned-MMS']
|
16 |
+
]
|
17 |
|
18 |
tts_examples = [
|
|
|
19 |
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "IS2AI-TurkicTTS"],
|
20 |
+
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
|
21 |
+
["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Ixxan-FineTuned-MMS"],
|
22 |
+
["Bu putbol musabiqisining axirlishishi bilen, bu musabiqe pesli axirlashti.", "IS2AI-TurkicTTS"],
|
23 |
+
["Bu putbol musabiqisining axirlishishi bilen, bu musabiqe pesli axirlashti.", "Meta-MMS"],
|
24 |
+
["Bu putbol musabiqisining axirlishishi bilen, bu musabiqe pesli axirlashti.", "Ixxan-FineTuned-MMS"],
|
25 |
+
["Yaxshimusiz?", "IS2AI-TurkicTTS"],
|
26 |
["Yaxshimusiz?", "Meta-MMS"],
|
27 |
+
["Yaxshimusiz?", "Ixxan-FineTuned-MMS"]
|
28 |
]
|