TTS-GPT-SoVITS

Sleeping

App Files Files Community

TTS-GPT-SoVITS / main.py

lijiacai

add stt

3a17e04 about 1 year ago

raw

history blame contribute delete

4.16 kB

	from fastapi import FastAPI, Body, File, Form, UploadFile, Response, Request
	from fastapi.responses import FileResponse, StreamingResponse
	from fastapi.staticfiles import StaticFiles
	import gradio as gr
	import os
	from enum import Enum
	import uvicorn
	import time
	import tempfile
	try:
	from model import text_to_speech, speech_to_text
	except:
	def text_to_speech(voice, text):
	return f"static/zh/{voice}.mp3"

	def speech_to_text(voice: str):
	return "文本测试",""

	description = """
	## [接口文档](/docs)
	## [效果演示](/)
	## 功能：
	- 零样本文本到语音（TTS）：输入 5 秒的声音样本，即刻体验文本到语音转换。

	- 少样本 TTS：仅需 1 分钟的训练数据即可微调模型，提升声音相似度和真实感。

	- 跨语言支持：支持与训练数据集不同语言的推理，目前支持英语、日语和中文。

	- 支持语音转文本/文本转语音

	"""
	app = FastAPI(title="text to speech", description=description)


	@app.middleware("http")
	async def add_process_time_header(request: Request, call_next):
	start_time = time.time()
	response = await call_next(request)
	process_time = time.time() - start_time
	response.headers["X-Process-Time"] = str(process_time)
	return response

	app.mount("/static", StaticFiles(directory="static"), name="static")


	class Language(str, Enum):
	en = "English"
	zh = "中文"


	class DefaultVoice(str, Enum):
	voice1 = "新闻小说主播-女士"
	voice2 = "温柔女士"


	@app.post("/tts")
	async def tts(
	voice: DefaultVoice = Form("新闻女士"),
	text: str = Form(..., description="转换文本")
	):
	wav_path = text_to_speech(voice=voice, text=text)
	headers = {
	"Content-Disposition": f"attachment; filename={wav_path}",
	"Content-Type": "audio/wav",
	}

	with open(wav_path, "rb") as audio_file:
	audio_content = audio_file.read()
	return Response(audio_content, headers=headers)


	@app.post("/stt")
	async def tts(
	voice: UploadFile = File(...)
	):
	contents = await voice.read()
	with tempfile.NamedTemporaryFile() as f:
	f.write(contents)
	f.flush()
	text, _ = speech_to_text(f.name)
	return {"text": text}


	class Demo:
	title = "text to speech"
	description = description

	def __init__(self):
	with gr.Blocks(theme=gr.themes.Soft()) as self.page:
	with gr.Row():
	gr.Markdown(value=self.description)
	with gr.Row():
	with gr.Column(scale=2):
	with gr.Row():
	text_tts = gr.Textbox(label="请输入需要转换的文本")
	with gr.Row():
	voice_tts = gr.Dropdown(
	["新闻小说主播-女士", "温柔女士"],
	label="选择音色")
	with gr.Row():
	audio_tts = gr.Audio(
	label="转换后的音频", type="filepath", scale=3)
	with gr.Row():
	button_tts = gr.Button(value="文本转语音")
	with gr.Column(scale=2):
	audio_stt = gr.Audio(
	label="上传语音", type="filepath", scale=3)
	with gr.Row():
	button_stt = gr.Button(value="文本转语音")
	text_stt = gr.Text(label="结果")
	# 事件
	button_tts.click(self.click_run_button_tts, inputs=[
	voice_tts, text_tts], outputs=[audio_tts])
	button_stt.click(self.click_run_button_stt, inputs=[
	audio_stt], outputs=[text_stt])

	def click_run_button_tts(self, voice, text):
	wav_path = text_to_speech(voice=voice, text=text)
	return wav_path

	def click_run_button_stt(self, audio):
	text,_ = speech_to_text(voice=audio)
	return text


	gr.mount_gradio_app(app, Demo().page, path="/")

	if __name__ == '__main__':

	uvicorn.run(app="main:app", port=int(
	os.environ.get("PORT", 7860)), host="0.0.0.0")