LiKenun's picture
Add text-to-speech (TTS) sample
caf2559
raw
history blame
4.19 kB
from dotenv import load_dotenv
from functools import partial
import gradio as gr
from huggingface_hub import InferenceClient
from image_classification import image_classification
from image_to_text import image_to_text
from text_to_image import text_to_image
from text_to_speech import text_to_speech
from utils import request_image
class App:
def __init__(self, client: InferenceClient):
self.client = client
def run(self):
with gr.Blocks(title="AI Building Blocks") as demo:
gr.Markdown("# AI Building Blocks")
gr.Markdown("A gallery of building blocks for building AI applications")
with gr.Tabs():
with gr.Tab("Text-to-image Generation"):
gr.Markdown("Generate an image from a text prompt.")
text_to_image_prompt = gr.Textbox(label="Prompt")
text_to_image_generate_button = gr.Button("Generate")
text_to_image_output = gr.Image(label="Image", type="pil")
text_to_image_generate_button.click(
fn=partial(text_to_image, self.client),
inputs=text_to_image_prompt,
outputs=text_to_image_output
)
with gr.Tab("Image-to-text or Image Captioning"):
gr.Markdown("Generate a text description of an image.")
image_to_text_url_input = gr.Textbox(label="Image URL")
image_to_text_image_request_button = gr.Button("Get Image")
image_to_text_image_input = gr.Image(label="Image", type="pil")
image_to_text_image_request_button.click(
fn=request_image,
inputs=image_to_text_url_input,
outputs=image_to_text_image_input
)
image_to_text_output = gr.List(label="Captions", headers=["Caption"])
image_to_text_button = gr.Button("Caption")
image_to_text_button.click(
fn=image_to_text,
inputs=image_to_text_image_input,
outputs=image_to_text_output
)
with gr.Tab("Image Classification"):
gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
image_classification_url_input = gr.Textbox(label="Image URL")
image_classification_image_request_button = gr.Button("Get Image")
image_classification_image_input = gr.Image(label="Image",type="pil")
image_classification_image_request_button.click(
fn=request_image,
inputs=image_classification_url_input,
outputs=image_classification_image_input
)
image_classification_button = gr.Button("Classify")
image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
image_classification_button.click(
fn=partial(image_classification, self.client),
inputs=image_classification_image_input,
outputs=image_classification_output
)
with gr.Tab("Text-to-speech (TTS)"):
gr.Markdown("Generate speech from a text.")
text_to_speech_text = gr.Textbox(label="Text")
text_to_speech_generate_button = gr.Button("Generate")
text_to_speech_output = gr.Audio(label="Speech")
text_to_speech_generate_button.click(
fn=text_to_speech,
inputs=text_to_speech_text,
outputs=text_to_speech_output
)
demo.launch()
if __name__ == "__main__":
load_dotenv()
app = App(InferenceClient())
app.run()