# This Gradio app generates audio from text using a simple text-to-speech model with configurable generation parameters. import gradio as gr import numpy as np # Define a function that takes text and generates audio with configurable parameters. def text_to_audio(text, use_sampling=True, top_k=250, top_p=0.0, temperature=1.0, duration=10.0, cfg_coef=3.0): # For demonstration purposes, we'll generate a simple sine wave. # In a real application, you would use a text-to-speech model here. sr = 44100 # Sample rate frequency = 440 # Frequency in Hz (A4 note) t = np.linspace(0, duration, sr * duration, endpoint=False) audio = (0.5 * np.sin(2 * np.pi * frequency * t)).astype(np.float32) return (sr, audio) # Create a Gradio interface that takes a textbox input, runs it through the text_to_audio function, and returns output to an audio component. with gr.Blocks() as demo: with gr.Row(): text_input = gr.Textbox(label="Input Text") use_sampling_checkbox = gr.Checkbox(label="Use Sampling", value=True) with gr.Row(): top_k_slider = gr.Slider(1, 1000, value=250, label="Top K") top_p_slider = gr.Slider(0.0, 1.0, value=0.0, label="Top P") with gr.Row(): temperature_slider = gr.Slider(0.1, 10.0, value=1.0, label="Temperature") duration_slider = gr.Slider(1.0, 60.0, value=10.0, label="Duration (s)") cfg_coef_slider = gr.Slider(0.1, 10.0, value=3.0, label="CFG Coefficient") audio_output = gr.Audio(label="Generated Audio", type="numpy") text_input.change( fn=text_to_audio, inputs=[text_input, use_sampling_checkbox, top_k_slider, top_p_slider, temperature_slider, duration_slider, cfg_coef_slider], outputs=audio_output ) # Launch the interface. demo.launch(show_error=True)