Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Body, File, Form, UploadFile, Response, Request | |
from fastapi.responses import FileResponse, StreamingResponse | |
from fastapi.staticfiles import StaticFiles | |
import gradio as gr | |
import os | |
from enum import Enum | |
import uvicorn | |
import time | |
import tempfile | |
try: | |
from model import text_to_speech, speech_to_text | |
except: | |
def text_to_speech(voice, text): | |
return f"static/zh/{voice}.mp3" | |
def speech_to_text(voice: str): | |
return "文本测试","" | |
description = """ | |
## [接口文档](/docs) | |
## [效果演示](/) | |
## 功能: | |
- 零样本文本到语音(TTS): 输入 5 秒的声音样本,即刻体验文本到语音转换。 | |
- 少样本 TTS: 仅需 1 分钟的训练数据即可微调模型,提升声音相似度和真实感。 | |
- 跨语言支持: 支持与训练数据集不同语言的推理,目前支持英语、日语和中文。 | |
- 支持语音转文本/文本转语音 | |
""" | |
app = FastAPI(title="text to speech", description=description) | |
async def add_process_time_header(request: Request, call_next): | |
start_time = time.time() | |
response = await call_next(request) | |
process_time = time.time() - start_time | |
response.headers["X-Process-Time"] = str(process_time) | |
return response | |
app.mount("/static", StaticFiles(directory="static"), name="static") | |
class Language(str, Enum): | |
en = "English" | |
zh = "中文" | |
class DefaultVoice(str, Enum): | |
voice1 = "新闻小说主播-女士" | |
voice2 = "温柔女士" | |
async def tts( | |
voice: DefaultVoice = Form("新闻女士"), | |
text: str = Form(..., description="转换文本") | |
): | |
wav_path = text_to_speech(voice=voice, text=text) | |
headers = { | |
"Content-Disposition": f"attachment; filename={wav_path}", | |
"Content-Type": "audio/wav", | |
} | |
with open(wav_path, "rb") as audio_file: | |
audio_content = audio_file.read() | |
return Response(audio_content, headers=headers) | |
async def tts( | |
voice: UploadFile = File(...) | |
): | |
contents = await voice.read() | |
with tempfile.NamedTemporaryFile() as f: | |
f.write(contents) | |
f.flush() | |
text, _ = speech_to_text(f.name) | |
return {"text": text} | |
class Demo: | |
title = "text to speech" | |
description = description | |
def __init__(self): | |
with gr.Blocks(theme=gr.themes.Soft()) as self.page: | |
with gr.Row(): | |
gr.Markdown(value=self.description) | |
with gr.Row(): | |
with gr.Column(scale=2): | |
with gr.Row(): | |
text_tts = gr.Textbox(label="请输入需要转换的文本") | |
with gr.Row(): | |
voice_tts = gr.Dropdown( | |
["新闻小说主播-女士", "温柔女士"], | |
label="选择音色") | |
with gr.Row(): | |
audio_tts = gr.Audio( | |
label="转换后的音频", type="filepath", scale=3) | |
with gr.Row(): | |
button_tts = gr.Button(value="文本转语音") | |
with gr.Column(scale=2): | |
audio_stt = gr.Audio( | |
label="上传语音", type="filepath", scale=3) | |
with gr.Row(): | |
button_stt = gr.Button(value="文本转语音") | |
text_stt = gr.Text(label="结果") | |
# 事件 | |
button_tts.click(self.click_run_button_tts, inputs=[ | |
voice_tts, text_tts], outputs=[audio_tts]) | |
button_stt.click(self.click_run_button_stt, inputs=[ | |
audio_stt], outputs=[text_stt]) | |
def click_run_button_tts(self, voice, text): | |
wav_path = text_to_speech(voice=voice, text=text) | |
return wav_path | |
def click_run_button_stt(self, audio): | |
text,_ = speech_to_text(voice=audio) | |
return text | |
gr.mount_gradio_app(app, Demo().page, path="/") | |
if __name__ == '__main__': | |
uvicorn.run(app="main:app", port=int( | |
os.environ.get("PORT", 7860)), host="0.0.0.0") | |