Spaces:
Runtime error
Runtime error
update
Browse files
app.py
CHANGED
@@ -7,12 +7,23 @@ import requests
|
|
7 |
import tempfile
|
8 |
from neon_tts_plugin_coqui import CoquiTTS
|
9 |
from datasets import load_dataset
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
dataset = load_dataset("ysharma/short_jokes")
|
13 |
|
14 |
# Language common in both the multilingual models - English, Chinese, Spanish, and French etc
|
15 |
-
#
|
16 |
model = whisper.load_model("base")
|
17 |
#model_med = whisper.load_model("medium")
|
18 |
# Languages covered in Whisper - (exhaustive list) :
|
@@ -48,11 +59,25 @@ print(f"Languages for Coqui are: {LANGUAGES}")
|
|
48 |
# nl - dutch, fi - finnish, sl - slovenian, lv - latvian, ga - ??
|
49 |
|
50 |
|
|
|
51 |
# Driver function
|
52 |
def driver_fun(audio) :
|
53 |
translation, lang = whisper_stt(audio) # older : transcribe, translation, lang
|
54 |
#text1 = model.transcribe(audio)["text"]
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
#if translation
|
57 |
#For now only taking in English text for Bloom prompting as inference model is not high spec
|
58 |
#text_generated = lang_model_response(transcribe, lang)
|
@@ -61,8 +86,8 @@ def driver_fun(audio) :
|
|
61 |
#if lang in ['es', 'fr']:
|
62 |
# speech = tts(transcribe, lang)
|
63 |
#else:
|
64 |
-
speech = tts(
|
65 |
-
return translation, speech #transcribe,
|
66 |
|
67 |
|
68 |
# Whisper - speech-to-text
|
@@ -117,9 +142,9 @@ with demo:
|
|
117 |
#out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
|
118 |
with gr.Column():
|
119 |
out_audio = gr.Audio(label='Audio response form CoquiTTS')
|
120 |
-
|
121 |
#out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
|
122 |
|
123 |
-
b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
|
124 |
|
125 |
demo.launch(enable_queue=True, debug=True)
|
|
|
7 |
import tempfile
|
8 |
from neon_tts_plugin_coqui import CoquiTTS
|
9 |
from datasets import load_dataset
|
10 |
+
import random
|
11 |
+
|
12 |
+
dataset = load_dataset("ysharma/short_jokes", split="train")
|
13 |
+
|
14 |
+
# Model 2: Sentence Transformer
|
15 |
+
API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/msmarco-distilbert-base-tas-b"
|
16 |
+
HF_TOKEN = os.environ["HF_TOKEN"]
|
17 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
18 |
+
|
19 |
+
def query(payload):
|
20 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
21 |
+
return response.json()
|
22 |
+
|
23 |
|
|
|
24 |
|
25 |
# Language common in both the multilingual models - English, Chinese, Spanish, and French etc
|
26 |
+
# Model 1: Whisper: Speech-to-text
|
27 |
model = whisper.load_model("base")
|
28 |
#model_med = whisper.load_model("medium")
|
29 |
# Languages covered in Whisper - (exhaustive list) :
|
|
|
59 |
# nl - dutch, fi - finnish, sl - slovenian, lv - latvian, ga - ??
|
60 |
|
61 |
|
62 |
+
|
63 |
# Driver function
|
64 |
def driver_fun(audio) :
|
65 |
translation, lang = whisper_stt(audio) # older : transcribe, translation, lang
|
66 |
#text1 = model.transcribe(audio)["text"]
|
67 |
|
68 |
+
random_val = random.randrange(0,231657)
|
69 |
+
if random_val < 226657:
|
70 |
+
lower_limit = random_val
|
71 |
+
upper_limit = random_val + 5000
|
72 |
+
else:
|
73 |
+
lower_limit = random_val - 5000
|
74 |
+
upper_limit = random_val
|
75 |
+
print(f"lower_limit : upper_limit = {lower_limit} : {upper_limit}")
|
76 |
+
dataset_subset = dataset['Joke'][lower_limit : upper_limit]
|
77 |
+
data = query({"inputs": {"source_sentence": "That is a happy person","sentences": dataset_subset} } )
|
78 |
+
max_match_score = max(data)
|
79 |
+
indx_score = data.index(max_match_score)
|
80 |
+
joke = max_match_score[indx_score]
|
81 |
#if translation
|
82 |
#For now only taking in English text for Bloom prompting as inference model is not high spec
|
83 |
#text_generated = lang_model_response(transcribe, lang)
|
|
|
86 |
#if lang in ['es', 'fr']:
|
87 |
# speech = tts(transcribe, lang)
|
88 |
#else:
|
89 |
+
speech = tts(joke, 'en') #'en' # translation
|
90 |
+
return translation, joke, speech #transcribe,
|
91 |
|
92 |
|
93 |
# Whisper - speech-to-text
|
|
|
142 |
#out_translation_en = gr.Textbox(label= 'English Translation of audio using OpenAI Whisper')
|
143 |
with gr.Column():
|
144 |
out_audio = gr.Audio(label='Audio response form CoquiTTS')
|
145 |
+
out_generated_joke = gr.Textbox(label= 'Joke returned! ')
|
146 |
#out_generated_text_en = gr.Textbox(label= 'AI response to your query in English using Bloom! ')
|
147 |
|
148 |
+
b1.click(driver_fun,inputs=[in_audio], outputs=[out_transcript, out_generated_joke, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
|
149 |
|
150 |
demo.launch(enable_queue=True, debug=True)
|