Update README.md
Browse files
README.md
CHANGED
@@ -141,7 +141,7 @@ from transformers import pipeline # for working with Whisper-Podlodka-Turbo
|
|
141 |
import wget # for downloading demo sound from its URL
|
142 |
from whisper_lid.whisper_lid import detect_language_in_speech # for spoken language detection
|
143 |
|
144 |
-
model_id = "
|
145 |
target_sampling_rate = 16_000 # Hz
|
146 |
|
147 |
asr = pipeline(model=model_id, device_map='auto', torch_dtype='auto')
|
@@ -149,7 +149,7 @@ asr = pipeline(model=model_id, device_map='auto', torch_dtype='auto')
|
|
149 |
# An example of speech recognition in Russian, spoken by a native speaker of this language
|
150 |
sound_ru_url = 'https://huggingface.co/bond005/whisper-podlodka-turbo/resolve/main/test_sound_ru.wav'
|
151 |
sound_ru_name = wget.download(sound_ru_url)
|
152 |
-
sound_ru = librosa.load(sound_ru_name, sr=
|
153 |
print('Duration of sound with Russian speech = {0:.3f} seconds.'.format(
|
154 |
sound_ru.shape[0] / target_sampling_rate
|
155 |
))
|
@@ -173,7 +173,7 @@ print(recognition_result['text'] + '\n')
|
|
173 |
# An example of speech recognition in English, pronounced by a non-native speaker of that language with an accent
|
174 |
sound_en_url = 'https://huggingface.co/bond005/whisper-podlodka-turbo/resolve/main/test_sound_en.wav'
|
175 |
sound_en_name = wget.download(sound_en_url)
|
176 |
-
sound_en = librosa.load(sound_en_name, sr=
|
177 |
print('Duration of sound with English speech = {0:.3f} seconds.'.format(
|
178 |
sound_en.shape[0] / target_sampling_rate
|
179 |
))
|
@@ -263,7 +263,7 @@ Along with special language tokens, the model can also return the special token
|
|
263 |
```python
|
264 |
nonspeech_sound_url = 'https://huggingface.co/bond005/whisper-podlodka-turbo/resolve/main/test_sound_nonspeech.wav'
|
265 |
nonspeech_sound_name = wget.download(nonspeech_sound_url)
|
266 |
-
nonspeech_sound = librosa.load(nonspeech_sound_name, sr=
|
267 |
print('Duration of sound without speech = {0:.3f} seconds.'.format(
|
268 |
nonspeech_sound.shape[0] / target_sampling_rate
|
269 |
))
|
@@ -337,7 +337,7 @@ The model was fine-tuned on a composite dataset including:
|
|
337 |
- [Common Voice](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0) (Ru, En)
|
338 |
- [Podlodka Speech](https://huggingface.co/datasets/bond005/podlodka_speech) (Ru)
|
339 |
- [Taiga Speech](https://huggingface.co/datasets/bond005/taiga_speech_v2) (Ru, synthetic)
|
340 |
-
- [Golos Farfield](https://huggingface.co/datasets/bond005/sberdevices_golos_100h_farfield) and [Golos
|
341 |
- [Sova Rudevices](https://huggingface.co/datasets/bond005/sova_rudevices) (Ru)
|
342 |
- [Audioset](https://huggingface.co/datasets/bond005/audioset-nonspeech) (non-speech audio)
|
343 |
|
@@ -375,7 +375,7 @@ The quality of the Russian speech recognition task was tested on test sub-sets o
|
|
375 |
- [Common Voice 11 Ru](https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0)
|
376 |
- [Podlodka Speech](https://huggingface.co/datasets/bond005/podlodka_speech)
|
377 |
- [Golos Farfield](https://huggingface.co/datasets/bond005/sberdevices_golos_100h_farfield)
|
378 |
-
- [Golos
|
379 |
- [Sova Rudevices](https://huggingface.co/datasets/bond005/sova_rudevices)
|
380 |
- [Russian Librispeech](https://huggingface.co/datasets/bond005/rulibrispeech)
|
381 |
|
|
|
141 |
import wget # for downloading demo sound from its URL
|
142 |
from whisper_lid.whisper_lid import detect_language_in_speech # for spoken language detection
|
143 |
|
144 |
+
model_id = "bond005/whisper-podlodks-turbo" # the best Whisper model :-)
|
145 |
target_sampling_rate = 16_000 # Hz
|
146 |
|
147 |
asr = pipeline(model=model_id, device_map='auto', torch_dtype='auto')
|
|
|
149 |
# An example of speech recognition in Russian, spoken by a native speaker of this language
|
150 |
sound_ru_url = 'https://huggingface.co/bond005/whisper-podlodka-turbo/resolve/main/test_sound_ru.wav'
|
151 |
sound_ru_name = wget.download(sound_ru_url)
|
152 |
+
sound_ru = librosa.load(sound_ru_name, sr=target_sampling_rate, mono=True)[0]
|
153 |
print('Duration of sound with Russian speech = {0:.3f} seconds.'.format(
|
154 |
sound_ru.shape[0] / target_sampling_rate
|
155 |
))
|
|
|
173 |
# An example of speech recognition in English, pronounced by a non-native speaker of that language with an accent
|
174 |
sound_en_url = 'https://huggingface.co/bond005/whisper-podlodka-turbo/resolve/main/test_sound_en.wav'
|
175 |
sound_en_name = wget.download(sound_en_url)
|
176 |
+
sound_en = librosa.load(sound_en_name, sr=target_sampling_rate, mono=True)[0]
|
177 |
print('Duration of sound with English speech = {0:.3f} seconds.'.format(
|
178 |
sound_en.shape[0] / target_sampling_rate
|
179 |
))
|
|
|
263 |
```python
|
264 |
nonspeech_sound_url = 'https://huggingface.co/bond005/whisper-podlodka-turbo/resolve/main/test_sound_nonspeech.wav'
|
265 |
nonspeech_sound_name = wget.download(nonspeech_sound_url)
|
266 |
+
nonspeech_sound = librosa.load(nonspeech_sound_name, sr=target_sampling_rate, mono=True)[0]
|
267 |
print('Duration of sound without speech = {0:.3f} seconds.'.format(
|
268 |
nonspeech_sound.shape[0] / target_sampling_rate
|
269 |
))
|
|
|
337 |
- [Common Voice](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0) (Ru, En)
|
338 |
- [Podlodka Speech](https://huggingface.co/datasets/bond005/podlodka_speech) (Ru)
|
339 |
- [Taiga Speech](https://huggingface.co/datasets/bond005/taiga_speech_v2) (Ru, synthetic)
|
340 |
+
- [Golos Farfield](https://huggingface.co/datasets/bond005/sberdevices_golos_100h_farfield) and [Golos Crowd](https://huggingface.co/datasets/bond005/sberdevices_golos_10h_crowd) (Ru)
|
341 |
- [Sova Rudevices](https://huggingface.co/datasets/bond005/sova_rudevices) (Ru)
|
342 |
- [Audioset](https://huggingface.co/datasets/bond005/audioset-nonspeech) (non-speech audio)
|
343 |
|
|
|
375 |
- [Common Voice 11 Ru](https://huggingface.co/datasets/mozilla-foundation/common_voice_11_0)
|
376 |
- [Podlodka Speech](https://huggingface.co/datasets/bond005/podlodka_speech)
|
377 |
- [Golos Farfield](https://huggingface.co/datasets/bond005/sberdevices_golos_100h_farfield)
|
378 |
+
- [Golos Crowd](https://huggingface.co/datasets/bond005/sberdevices_golos_10h_crowd)
|
379 |
- [Sova Rudevices](https://huggingface.co/datasets/bond005/sova_rudevices)
|
380 |
- [Russian Librispeech](https://huggingface.co/datasets/bond005/rulibrispeech)
|
381 |
|