feat: add 30s cd for creating voice
Browse files
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import time
|
| 2 |
import os
|
| 3 |
import logging
|
|
@@ -47,7 +48,7 @@ def update_all_characters(lang, category):
|
|
| 47 |
selected_index=None,
|
| 48 |
),
|
| 49 |
category,
|
| 50 |
-
gr.update(choices=category, value=category[0])
|
| 51 |
)
|
| 52 |
|
| 53 |
|
|
@@ -77,7 +78,7 @@ def get_characters(
|
|
| 77 |
|
| 78 |
# 处理头像数据
|
| 79 |
import pickle
|
| 80 |
-
|
| 81 |
def process_avatar(avatar):
|
| 82 |
if not isinstance(avatar, str):
|
| 83 |
try:
|
|
@@ -85,8 +86,8 @@ def get_characters(
|
|
| 85 |
except:
|
| 86 |
return avatar
|
| 87 |
return avatar
|
| 88 |
-
|
| 89 |
-
unique_characters[
|
| 90 |
|
| 91 |
# 应用分页
|
| 92 |
start_index = (page - 1) * per_page
|
|
@@ -334,15 +335,45 @@ async def update_prompt_audio(current_character):
|
|
| 334 |
else:
|
| 335 |
return None
|
| 336 |
|
| 337 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
updates = {}
|
| 339 |
-
for field, value in [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
if field in ["avatar", "audio_data"]:
|
| 341 |
if value is None or (isinstance(value, np.ndarray) and value.size == 0):
|
| 342 |
updates[field] = gr.update(value=None)
|
| 343 |
elif value == "":
|
| 344 |
updates[field] = gr.update(value="")
|
| 345 |
-
|
| 346 |
if updates:
|
| 347 |
if lang == "zh":
|
| 348 |
gr.Warning("请填写完整信息")
|
|
@@ -352,7 +383,10 @@ async def create_voice(avatar, name, emotion, tags, gender, audio_data, lang):
|
|
| 352 |
gr.Warning("すべての情報を入力してください")
|
| 353 |
elif lang == "ko":
|
| 354 |
gr.Warning("모든 정보를 입력하세요")
|
| 355 |
-
return tuple(
|
|
|
|
|
|
|
|
|
|
| 356 |
duration = len(audio_data[1]) / audio_data[0]
|
| 357 |
if duration < 3.2 or duration > 8:
|
| 358 |
if lang == "zh":
|
|
@@ -363,17 +397,25 @@ async def create_voice(avatar, name, emotion, tags, gender, audio_data, lang):
|
|
| 363 |
gr.Warning("音声の長さは3.2秒から8秒の間にしてください")
|
| 364 |
elif lang == "ko":
|
| 365 |
gr.Warning("음성 길이는 3.2초에서 8초 사이로 설정해야 합니다")
|
| 366 |
-
return avatar, name, emotion, tags, gender, audio_data
|
| 367 |
await generate_voice(avatar, name, emotion, tags, gender, audio_data, lang)
|
| 368 |
if lang == "zh":
|
| 369 |
-
gr.Info("创建成功,您创建的语音将在审核后上线")
|
| 370 |
elif lang == "en":
|
| 371 |
-
gr.Info(
|
|
|
|
|
|
|
|
|
|
| 372 |
elif lang == "ja":
|
| 373 |
-
gr.Info(
|
|
|
|
|
|
|
| 374 |
elif lang == "ko":
|
| 375 |
-
gr.Info(
|
| 376 |
-
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
head = """
|
| 379 |
<title>Free Online Text to Speech (TTS) | Convert Text to Audio</title>
|
|
@@ -500,19 +542,29 @@ with gr.Blocks(title="Online Free TTS", theme=gr.themes.Soft(), head=head) as de
|
|
| 500 |
pass
|
| 501 |
|
| 502 |
with gr.Tab(gettext("Create Voice")):
|
|
|
|
| 503 |
gr.Markdown(gettext("Note"))
|
| 504 |
with gr.Row():
|
| 505 |
-
avatar = gr.Image(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 506 |
with gr.Column():
|
| 507 |
with gr.Row():
|
| 508 |
name = gr.Textbox(
|
| 509 |
label=gettext("Name"), interactive=True, max_lines=1
|
| 510 |
)
|
| 511 |
emotion = gr.Textbox(
|
| 512 |
-
label=gettext("Emotion\n(Happy, Sad, Angry)"),
|
|
|
|
|
|
|
| 513 |
)
|
| 514 |
tags = gr.Textbox(
|
| 515 |
-
label=gettext("Tags\n(Genshin, Cute, Girl, Boy, etc.)"),
|
|
|
|
|
|
|
| 516 |
)
|
| 517 |
gender = gr.Dropdown(
|
| 518 |
label=gettext("Gender"),
|
|
@@ -523,9 +575,14 @@ with gr.Blocks(title="Online Free TTS", theme=gr.themes.Soft(), head=head) as de
|
|
| 523 |
],
|
| 524 |
interactive=True,
|
| 525 |
)
|
| 526 |
-
audio_data = gr.Audio(
|
| 527 |
-
|
| 528 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
gr.Markdown(gettext(terms))
|
| 530 |
# -------------- 绑定事件 --------------
|
| 531 |
|
|
@@ -534,8 +591,12 @@ with gr.Blocks(title="Online Free TTS", theme=gr.themes.Soft(), head=head) as de
|
|
| 534 |
inputs=[lang, category],
|
| 535 |
outputs=[all_characters_state, characters, gallery, category, kind],
|
| 536 |
)
|
| 537 |
-
|
| 538 |
-
demo.load(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
|
| 540 |
add_voice_button.click(
|
| 541 |
fn=add_new_voice,
|
|
@@ -603,8 +664,25 @@ with gr.Blocks(title="Online Free TTS", theme=gr.themes.Soft(), head=head) as de
|
|
| 603 |
|
| 604 |
create_button.click(
|
| 605 |
fn=create_voice,
|
| 606 |
-
inputs=[
|
| 607 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
)
|
| 609 |
|
| 610 |
|
|
|
|
| 1 |
+
import sys
|
| 2 |
import time
|
| 3 |
import os
|
| 4 |
import logging
|
|
|
|
| 48 |
selected_index=None,
|
| 49 |
),
|
| 50 |
category,
|
| 51 |
+
gr.update(choices=category, value=category[0]),
|
| 52 |
)
|
| 53 |
|
| 54 |
|
|
|
|
| 78 |
|
| 79 |
# 处理头像数据
|
| 80 |
import pickle
|
| 81 |
+
|
| 82 |
def process_avatar(avatar):
|
| 83 |
if not isinstance(avatar, str):
|
| 84 |
try:
|
|
|
|
| 86 |
except:
|
| 87 |
return avatar
|
| 88 |
return avatar
|
| 89 |
+
|
| 90 |
+
unique_characters["头像"] = unique_characters["头像"].apply(process_avatar)
|
| 91 |
|
| 92 |
# 应用分页
|
| 93 |
start_index = (page - 1) * per_page
|
|
|
|
| 335 |
else:
|
| 336 |
return None
|
| 337 |
|
| 338 |
+
|
| 339 |
+
async def create_voice(
|
| 340 |
+
avatar, name, emotion, tags, gender, audio_data, lang, since_last_update
|
| 341 |
+
):
|
| 342 |
+
if since_last_update is None:
|
| 343 |
+
since_last_update = time.time()
|
| 344 |
+
elif time.time() - since_last_update < 30:
|
| 345 |
+
if lang == "zh":
|
| 346 |
+
gr.Warning(
|
| 347 |
+
f"已提交上个创建请求,请在{30 - (time.time() - since_last_update):.1f}秒后提交新的角色"
|
| 348 |
+
)
|
| 349 |
+
elif lang == "en":
|
| 350 |
+
gr.Warning(
|
| 351 |
+
f"The last creation request has been submitted. Please try to create a new character after {30 - (time.time() - since_last_update):.1f} seconds"
|
| 352 |
+
)
|
| 353 |
+
elif lang == "ja":
|
| 354 |
+
gr.Warning(
|
| 355 |
+
f"前回の作成リクエストが送信されました。{30 - (time.time() - since_last_update):.1f}秒後に新しいキャラクターを作成してください"
|
| 356 |
+
)
|
| 357 |
+
elif lang == "ko":
|
| 358 |
+
gr.Warning(
|
| 359 |
+
f"이전 생성 요청이 제출되었습니다. {30 - (time.time() - since_last_update):.1f}초 후에 새 캐릭터를 만들어주세요"
|
| 360 |
+
)
|
| 361 |
+
return avatar, name, emotion, tags, gender, audio_data, since_last_update
|
| 362 |
updates = {}
|
| 363 |
+
for field, value in [
|
| 364 |
+
("avatar", avatar),
|
| 365 |
+
("name", name),
|
| 366 |
+
("emotion", emotion),
|
| 367 |
+
("tags", tags),
|
| 368 |
+
("gender", gender),
|
| 369 |
+
("audio_data", audio_data),
|
| 370 |
+
]:
|
| 371 |
if field in ["avatar", "audio_data"]:
|
| 372 |
if value is None or (isinstance(value, np.ndarray) and value.size == 0):
|
| 373 |
updates[field] = gr.update(value=None)
|
| 374 |
elif value == "":
|
| 375 |
updates[field] = gr.update(value="")
|
| 376 |
+
|
| 377 |
if updates:
|
| 378 |
if lang == "zh":
|
| 379 |
gr.Warning("请填写完整信息")
|
|
|
|
| 383 |
gr.Warning("すべての情報を入力してください")
|
| 384 |
elif lang == "ko":
|
| 385 |
gr.Warning("모든 정보를 입력하세요")
|
| 386 |
+
return tuple(
|
| 387 |
+
[updates.get(field, gr.update())
|
| 388 |
+
for field in ["avatar", "name", "emotion", "tags", "gender", "audio_data"]] + [since_last_update]
|
| 389 |
+
)
|
| 390 |
duration = len(audio_data[1]) / audio_data[0]
|
| 391 |
if duration < 3.2 or duration > 8:
|
| 392 |
if lang == "zh":
|
|
|
|
| 397 |
gr.Warning("音声の長さは3.2秒から8秒の間にしてください")
|
| 398 |
elif lang == "ko":
|
| 399 |
gr.Warning("음성 길이는 3.2초에서 8초 사이로 설정해야 합니다")
|
| 400 |
+
return avatar, name, emotion, tags, gender, audio_data, since_last_update
|
| 401 |
await generate_voice(avatar, name, emotion, tags, gender, audio_data, lang)
|
| 402 |
if lang == "zh":
|
| 403 |
+
gr.Info("创建成功,您创建的语音将在审核后上线", duration=20)
|
| 404 |
elif lang == "en":
|
| 405 |
+
gr.Info(
|
| 406 |
+
"Creation successful. The voice you created will be available after review.",
|
| 407 |
+
duration=20,
|
| 408 |
+
)
|
| 409 |
elif lang == "ja":
|
| 410 |
+
gr.Info(
|
| 411 |
+
"作成が完了しました。作成された音声は審査後に公開されます。", duration=20
|
| 412 |
+
)
|
| 413 |
elif lang == "ko":
|
| 414 |
+
gr.Info(
|
| 415 |
+
"생성 완료. 귀하가 생성한 음성은 검토 후 공개될 예정입니다.", duration=20
|
| 416 |
+
)
|
| 417 |
+
return tuple([gr.update(value=None) for _ in range(6)] + [since_last_update]) # 6个更新项
|
| 418 |
+
|
| 419 |
|
| 420 |
head = """
|
| 421 |
<title>Free Online Text to Speech (TTS) | Convert Text to Audio</title>
|
|
|
|
| 542 |
pass
|
| 543 |
|
| 544 |
with gr.Tab(gettext("Create Voice")):
|
| 545 |
+
since_last_update = gr.State(None)
|
| 546 |
gr.Markdown(gettext("Note"))
|
| 547 |
with gr.Row():
|
| 548 |
+
avatar = gr.Image(
|
| 549 |
+
label=gettext("Avatar"),
|
| 550 |
+
interactive=True,
|
| 551 |
+
type="pil",
|
| 552 |
+
image_mode="RGBA",
|
| 553 |
+
)
|
| 554 |
with gr.Column():
|
| 555 |
with gr.Row():
|
| 556 |
name = gr.Textbox(
|
| 557 |
label=gettext("Name"), interactive=True, max_lines=1
|
| 558 |
)
|
| 559 |
emotion = gr.Textbox(
|
| 560 |
+
label=gettext("Emotion\n(Happy, Sad, Angry)"),
|
| 561 |
+
interactive=True,
|
| 562 |
+
max_lines=1,
|
| 563 |
)
|
| 564 |
tags = gr.Textbox(
|
| 565 |
+
label=gettext("Tags\n(Genshin, Cute, Girl, Boy, etc.)"),
|
| 566 |
+
interactive=True,
|
| 567 |
+
max_lines=1,
|
| 568 |
)
|
| 569 |
gender = gr.Dropdown(
|
| 570 |
label=gettext("Gender"),
|
|
|
|
| 575 |
],
|
| 576 |
interactive=True,
|
| 577 |
)
|
| 578 |
+
audio_data = gr.Audio(
|
| 579 |
+
label=gettext("Prompt Audio(min 3.2s, max 8s)"),
|
| 580 |
+
interactive=True,
|
| 581 |
+
)
|
| 582 |
+
create_button = gr.Button(
|
| 583 |
+
gettext("Create Voice"), variant="primary"
|
| 584 |
+
)
|
| 585 |
+
|
| 586 |
gr.Markdown(gettext(terms))
|
| 587 |
# -------------- 绑定事件 --------------
|
| 588 |
|
|
|
|
| 591 |
inputs=[lang, category],
|
| 592 |
outputs=[all_characters_state, characters, gallery, category, kind],
|
| 593 |
)
|
| 594 |
+
|
| 595 |
+
demo.load(
|
| 596 |
+
update_all_characters,
|
| 597 |
+
inputs=[lang, category],
|
| 598 |
+
outputs=[all_characters_state, characters, gallery, category, kind],
|
| 599 |
+
)
|
| 600 |
|
| 601 |
add_voice_button.click(
|
| 602 |
fn=add_new_voice,
|
|
|
|
| 664 |
|
| 665 |
create_button.click(
|
| 666 |
fn=create_voice,
|
| 667 |
+
inputs=[
|
| 668 |
+
avatar,
|
| 669 |
+
name,
|
| 670 |
+
emotion,
|
| 671 |
+
tags,
|
| 672 |
+
gender,
|
| 673 |
+
audio_data,
|
| 674 |
+
lang,
|
| 675 |
+
since_last_update,
|
| 676 |
+
],
|
| 677 |
+
outputs=[
|
| 678 |
+
avatar,
|
| 679 |
+
name,
|
| 680 |
+
emotion,
|
| 681 |
+
tags,
|
| 682 |
+
gender,
|
| 683 |
+
audio_data,
|
| 684 |
+
since_last_update,
|
| 685 |
+
],
|
| 686 |
)
|
| 687 |
|
| 688 |
|