ai-tube-model-parler-tts-mini

Paused

App Files Files Community

jbilcke-hf HF Staff commited on Apr 22, 2024

Commit

5847fd8

verified ·

1 Parent(s): ac2a235

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -9

app.py CHANGED Viewed

@@ -17,7 +17,11 @@ feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)
 SAMPLE_RATE = feature_extractor.sampling_rate
 SEED = 42
-def gen_tts(text, description):
     inputs = tokenizer(description, return_tensors="pt").to(device)
     prompt = tokenizer(text, return_tensors="pt").to(device)
@@ -27,10 +31,19 @@ def gen_tts(text, description):
     )
     audio_arr = generation.cpu().numpy().squeeze()
-    return SAMPLE_RATE, audio_arr
-with gr.Blocks(css=css) as block:
    gr.HTML("""
         <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
         <div style="text-align: center; color: black;">
@@ -38,13 +51,13 @@ with gr.Blocks(css=css) as block:
         <p style="color: black;">It is not available for public use, but you can use the <a href="https://huggingface.co/spaces/ByteDance/AnimateDiff-Lightning" target="_blank">original space</a>.</p>
         </div>
         </div>""")
-    with gr.Row():
-        input_text = gr.Textbox(label="Input Text")
-        description = gr.Textbox(label="Description")
-        run_button = gr.Button("Generate Audio")
-        audio_out = gr.Textbox()
-    inputs = [input_text, description]
     outputs = [audio_out]
     run_button.click(fn=gen_tts, inputs=inputs, outputs=outputs, queue=True)

 SAMPLE_RATE = feature_extractor.sampling_rate
 SEED = 42
+def gen_tts(secret_token, text, description):
+    if secret_token != SECRET_TOKEN:
+        raise gr.Error(
+            f'Invalid secret token. Please fork the original space if you want to use it for yourself.')
     inputs = tokenizer(description, return_tensors="pt").to(device)
     prompt = tokenizer(text, return_tensors="pt").to(device)
     )
     audio_arr = generation.cpu().numpy().squeeze()
+    # Write the numpy array as a WAV file
+    buffer = BytesIO()
+    write(buffer, SAMPLE_RATE, audio_arr.astype(np.int16))
+    buffer.seek(0)
+    # Encode the WAV file in base64
+    audio_base64 = base64.b64encode(buffer.read()).decode('utf-8')
+    data_uri = 'data:audio/wav;base64,' + audio_base64
+    return data_uri
+with gr.Blocks() as block:
    gr.HTML("""
         <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
         <div style="text-align: center; color: black;">
         <p style="color: black;">It is not available for public use, but you can use the <a href="https://huggingface.co/spaces/ByteDance/AnimateDiff-Lightning" target="_blank">original space</a>.</p>
         </div>
         </div>""")
+    secret_token = gr.Textbox(label="Secret token")
+    input_text = gr.Textbox(label="Input Text")
+    description = gr.Textbox(label="Description")
+    run_button = gr.Button("Generate Audio")
+    audio_out = gr.Textbox()
+    inputs = [secret_token, input_text, description]
     outputs = [audio_out]
     run_button.click(fn=gen_tts, inputs=inputs, outputs=outputs, queue=True)