Spaces:

LiKenun
/

ai-building-blocks

Running on Zero

App Files Files Community

ai-building-blocks / app.py

LiKenun

Add text-to-speech (TTS) sample

caf2559 about 1 month ago

raw

history blame

4.19 kB

	from dotenv import load_dotenv
	from functools import partial
	import gradio as gr
	from huggingface_hub import InferenceClient
	from image_classification import image_classification
	from image_to_text import image_to_text
	from text_to_image import text_to_image
	from text_to_speech import text_to_speech
	from utils import request_image


	class App:

	def __init__(self, client: InferenceClient):
	self.client = client

	def run(self):
	with gr.Blocks(title="AI Building Blocks") as demo:
	gr.Markdown("# AI Building Blocks")
	gr.Markdown("A gallery of building blocks for building AI applications")
	with gr.Tabs():
	with gr.Tab("Text-to-image Generation"):
	gr.Markdown("Generate an image from a text prompt.")
	text_to_image_prompt = gr.Textbox(label="Prompt")
	text_to_image_generate_button = gr.Button("Generate")
	text_to_image_output = gr.Image(label="Image", type="pil")
	text_to_image_generate_button.click(
	fn=partial(text_to_image, self.client),
	inputs=text_to_image_prompt,
	outputs=text_to_image_output
	)
	with gr.Tab("Image-to-text or Image Captioning"):
	gr.Markdown("Generate a text description of an image.")
	image_to_text_url_input = gr.Textbox(label="Image URL")
	image_to_text_image_request_button = gr.Button("Get Image")
	image_to_text_image_input = gr.Image(label="Image", type="pil")
	image_to_text_image_request_button.click(
	fn=request_image,
	inputs=image_to_text_url_input,
	outputs=image_to_text_image_input
	)
	image_to_text_output = gr.List(label="Captions", headers=["Caption"])
	image_to_text_button = gr.Button("Caption")
	image_to_text_button.click(
	fn=image_to_text,
	inputs=image_to_text_image_input,
	outputs=image_to_text_output
	)
	with gr.Tab("Image Classification"):
	gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
	image_classification_url_input = gr.Textbox(label="Image URL")
	image_classification_image_request_button = gr.Button("Get Image")
	image_classification_image_input = gr.Image(label="Image",type="pil")
	image_classification_image_request_button.click(
	fn=request_image,
	inputs=image_classification_url_input,
	outputs=image_classification_image_input
	)
	image_classification_button = gr.Button("Classify")
	image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
	image_classification_button.click(
	fn=partial(image_classification, self.client),
	inputs=image_classification_image_input,
	outputs=image_classification_output
	)
	with gr.Tab("Text-to-speech (TTS)"):
	gr.Markdown("Generate speech from a text.")
	text_to_speech_text = gr.Textbox(label="Text")
	text_to_speech_generate_button = gr.Button("Generate")
	text_to_speech_output = gr.Audio(label="Speech")
	text_to_speech_generate_button.click(
	fn=text_to_speech,
	inputs=text_to_speech_text,
	outputs=text_to_speech_output
	)

	demo.launch()


	if __name__ == "__main__":
	load_dotenv()
	app = App(InferenceClient())
	app.run()