Spaces:

SeyedAli
/

Persian-Speech-Transcription

Running

App Files Files Community

SeyedAli commited on Sep 21, 2023

Commit

2c5f2c1

1 Parent(s): 277ff1a

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -42

app.py CHANGED Viewed

@@ -13,43 +13,6 @@ text_output = gr.TextArea(label="متن فارسی", type="text")
 processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
 model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
-chars_to_ignore = [
-    ",", "?", ".", "!", "-", ";", ":", '""', "%", "'", '"', "�",
-    "#", "!", "?", "«", "»", "(", ")", "؛", ",", "?", ".", "!", "-", ";", ":", '"',
-    "“", "%", "‘", "�", "–", "…", "_", "”", '“', '„'
-]
-chars_to_mapping = {
-"\u200c": " ", "\u200d": " ", "\u200e": " ", "\u200f": " ", "\ufeff": " ",
-}
-def multiple_replace(text, chars_to_mapping):
-    pattern = "|".join(map(re.escape, chars_to_mapping.keys()))
-    return re.sub(pattern, lambda m: chars_to_mapping[m.group()], str(text))
-def remove_special_characters(text, chars_to_ignore_regex):
-    text = re.sub(chars_to_ignore_regex, '', text).lower() + " "
-    return text
-def normalizer(batch, chars_to_ignore, chars_to_mapping):
-    chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
-    text = batch[0].lower().strip()
-    text = text.replace("\u0307", " ").strip()
-    text = multiple_replace(text, chars_to_mapping)
-    text = remove_special_characters(text, chars_to_ignore_regex)
-    batch = text
-    return batch
-def speech_file_to_array_fn(batch):
-    speech_array, sampling_rate = torchaudio.load(batch["path"])
-    speech_array = speech_array.squeeze().numpy()
-    speech_array = librosa.resample(np.asarray(speech_array), sampling_rate, 16_000)
-    batch["speech"] = speech_array
-    return batch
 def ASR(audio):
    pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
@@ -70,12 +33,7 @@ def ASR(audio):
         with torch.no_grad():
             logits = model(input_values,attention_mask).logits
         # Decode the transcription
-        #result = normalizer(processor.batch_decode(torch.argmax(logits[0], dim=-1)),chars_to_ignore,chars_to_mapping)
         result = processor.decode(torch.argmax(logits[0], dim=-1))
-        # max_items = np.random.randint(0, len(result), 10).tolist()
-        # for i in max_items:
-        #     transcription=result[i]
-        #     return transcription
         return result
 iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=text_output)
 iface.launch(share=False)

 processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
 model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
 def ASR(audio):
    pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
         with torch.no_grad():
             logits = model(input_values,attention_mask).logits
         # Decode the transcription
         result = processor.decode(torch.argmax(logits[0], dim=-1))
         return result
 iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=text_output)
 iface.launch(share=False)