ai-tube-model-parler-tts-mini

Paused

App Files Files Community

ai-tube-model-parler-tts-mini / app.py

jbilcke-hf HF Staff

Update app.py

f75013b verified about 1 year ago

raw

history blame

2.5 kB

	import gradio as gr
	import torch

	from parler_tts import ParlerTTSForConditionalGeneration
	from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed

	device = "cuda:0" if torch.cuda.is_available() else "cpu"

	repo_id = "parler-tts/parler_tts_mini_v0.1"

	model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
	tokenizer = AutoTokenizer.from_pretrained(repo_id)
	feature_extractor = AutoFeatureExtractor.from_pretrained(repo_id)

	SAMPLE_RATE = feature_extractor.sampling_rate
	SEED = 42

	def gen_tts(secret_token, text, description):
	if secret_token != SECRET_TOKEN:
	raise gr.Error(
	f'Invalid secret token. Please fork the original space if you want to use it for yourself.')

	inputs = tokenizer(description, return_tensors="pt").to(device)
	prompt = tokenizer(text, return_tensors="pt").to(device)

	set_seed(SEED)
	generation = model.generate(
	input_ids=inputs.input_ids, prompt_input_ids=prompt.input_ids, do_sample=True, temperature=1.0
	)
	audio_arr = generation.cpu().numpy().squeeze()

	# Write the numpy array as a WAV file
	buffer = BytesIO()
	write(buffer, SAMPLE_RATE, audio_arr.astype(np.int16))
	buffer.seek(0)

	# Encode the WAV file in base64
	audio_base64 = base64.b64encode(buffer.read()).decode('utf-8')
	data_uri = 'data:audio/wav;base64,' + audio_base64

	return data_uri


	with gr.Blocks() as block:
	gr.HTML("""
	<div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
	<div style="text-align: center; color: black;">
	<p style="color: black;">This space is a headless component of the cloud rendering engine used by AiTube.</p>
	<p style="color: black;">It is not available for public use, but you can use the <a href="https://huggingface.co/spaces/ByteDance/AnimateDiff-Lightning" target="_blank">original space</a>.</p>
	</div>
	</div>""")
	secret_token = gr.Textbox(label="Secret token")
	input_text = gr.Textbox(label="Input Text")
	description = gr.Textbox(label="Description")
	run_button = gr.Button("Generate Audio")
	audio_out = gr.Textbox()

	inputs = [secret_token, input_text, description]
	outputs = [audio_out]
	run_button.click(fn=gen_tts, inputs=inputs, outputs=outputs, queue=True)

	block.queue()
	block.launch(share=True)