Irpan commited on
Commit
499b2c1
·
1 Parent(s): c492cbb
Files changed (3) hide show
  1. asr.py +6 -6
  2. tts.py +2 -2
  3. util.py +17 -6
asr.py CHANGED
@@ -13,13 +13,13 @@ import util
13
 
14
  # Load processor and model
15
  models_info = {
16
- "OpenAI-Whisper-Uzbek": {
17
  "processor": WhisperProcessor.from_pretrained("openai/whisper-small", language="uzbek", task="transcribe"),
18
  "model": AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small"),
19
  "ctc_model": False,
20
  "arabic_script": False
21
  },
22
- "Meta-MMS-Uyghur": {
23
  "processor": AutoProcessor.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic'),
24
  "model": AutoModelForCTC.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic', ignore_mismatched_sizes=True),
25
  "ctc_model": True,
@@ -63,10 +63,10 @@ def transcribe(audio_data, model_id) -> str:
63
  else:
64
  return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data)), None
65
 
66
- # Check audio duration
67
- duration = audio_input.shape[1] / sampling_rate
68
- if duration > 10:
69
- return f"<<ERROR: Audio duration ({duration:.2f}s) exceeds 10 seconds. Please upload a shorter audio clip for faster processing.>>", None
70
 
71
  model = models_info[model_id]["model"]
72
  processor = models_info[model_id]["processor"]
 
13
 
14
  # Load processor and model
15
  models_info = {
16
+ "OpenAI-Whisper": {
17
  "processor": WhisperProcessor.from_pretrained("openai/whisper-small", language="uzbek", task="transcribe"),
18
  "model": AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-small"),
19
  "ctc_model": False,
20
  "arabic_script": False
21
  },
22
+ "Meta-MMS": {
23
  "processor": AutoProcessor.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic'),
24
  "model": AutoModelForCTC.from_pretrained("facebook/mms-1b-all", target_lang='uig-script_arabic', ignore_mismatched_sizes=True),
25
  "ctc_model": True,
 
63
  else:
64
  return "<<ERROR: Invalid Audio Input Instance: {}>>".format(type(audio_data)), None
65
 
66
+ # # Check audio duration
67
+ # duration = audio_input.shape[1] / sampling_rate
68
+ # if duration > 10:
69
+ # return f"<<ERROR: Audio duration ({duration:.2f}s) exceeds 10 seconds. Please upload a shorter audio clip for faster processing.>>", None
70
 
71
  model = models_info[model_id]["model"]
72
  processor = models_info[model_id]["processor"]
tts.py CHANGED
@@ -49,8 +49,8 @@ text2speech.spc2wav = None ### disable griffin-lim
49
 
50
  def synthesize(text, model_id):
51
  print(text)
52
- if len(text) > 200:
53
- raise ValueError(f"Input text exceeds 200 characters. Please provide a shorter input text for faster processing.")
54
 
55
  if model_id == 'IS2AI-TurkicTTS':
56
  return synthesize_turkic_tts(text)
 
49
 
50
  def synthesize(text, model_id):
51
  print(text)
52
+ # if len(text) > 200:
53
+ # raise ValueError(f"Input text exceeds 200 characters. Please provide a shorter input text for faster processing.")
54
 
55
  if model_id == 'IS2AI-TurkicTTS':
56
  return synthesize_turkic_tts(text)
util.py CHANGED
@@ -4,14 +4,25 @@ from umsc import UgMultiScriptConverter
4
  ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
5
  ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
6
 
7
- asr_examples = [['examples/1.wav', 'ixxan/wav2vec2-large-mms-1b-uyghur-latin'],
8
- ['examples/2.wav', 'ixxan/wav2vec2-large-mms-1b-uyghur-latin']]
 
 
 
 
 
 
 
 
9
 
10
  tts_examples = [
11
- ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
12
  ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "IS2AI-TurkicTTS"],
13
- ["Bu putbol musabiqisining axirlishishi", "Meta-MMS"],
14
- ["Bu putbol musabiqisining axirlishishi", "IS2AI-TurkicTTS"],
 
 
 
 
15
  ["Yaxshimusiz?", "Meta-MMS"],
16
- ["Yaxshimusiz?", "IS2AI-TurkicTTS"]
17
  ]
 
4
  ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
5
  ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
6
 
7
+ asr_examples = [
8
+ ['examples/1.wav', 'OpenAI-Whisper'],
9
+ ['examples/1.wav', 'Meta-MMS'],
10
+ ['examples/1.wav', 'Ixxan-FineTuned-Whisper'],
11
+ ['examples/1.wav', 'Ixxan-FineTuned-MMS'],
12
+ ['examples/2.wav', 'OpenAI-Whisper'],
13
+ ['examples/2.wav', 'Meta-MMS'],
14
+ ['examples/2.wav', 'Ixxan-FineTuned-Whisper'],
15
+ ['examples/2.wav', 'Ixxan-FineTuned-MMS']
16
+ ]
17
 
18
  tts_examples = [
 
19
  ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "IS2AI-TurkicTTS"],
20
+ ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
21
+ ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Ixxan-FineTuned-MMS"],
22
+ ["Bu putbol musabiqisining axirlishishi bilen, bu musabiqe pesli axirlashti.", "IS2AI-TurkicTTS"],
23
+ ["Bu putbol musabiqisining axirlishishi bilen, bu musabiqe pesli axirlashti.", "Meta-MMS"],
24
+ ["Bu putbol musabiqisining axirlishishi bilen, bu musabiqe pesli axirlashti.", "Ixxan-FineTuned-MMS"],
25
+ ["Yaxshimusiz?", "IS2AI-TurkicTTS"],
26
  ["Yaxshimusiz?", "Meta-MMS"],
27
+ ["Yaxshimusiz?", "Ixxan-FineTuned-MMS"]
28
  ]