Spaces:
Running
on
Zero
Running
on
Zero
Update
Browse files
app.py
CHANGED
@@ -44,42 +44,56 @@ for v in CHOICES.values():
|
|
44 |
|
45 |
@spaces.GPU(duration=30)
|
46 |
def generate(text: str, voice: str = "af_heart", speed: float = 1.0) -> tuple[tuple[int, np.ndarray], str]:
|
47 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
Available voices:
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
|
79 |
Args:
|
80 |
-
text:
|
81 |
-
voice:
|
82 |
-
speed:
|
83 |
|
84 |
Returns:
|
85 |
A tuple containing the audio and the tokens used to generate the audio.
|
|
|
44 |
|
45 |
@spaces.GPU(duration=30)
|
46 |
def generate(text: str, voice: str = "af_heart", speed: float = 1.0) -> tuple[tuple[int, np.ndarray], str]:
|
47 |
+
"""Synthesizes speech from English text using the Kokoro TTS model.
|
48 |
+
|
49 |
+
Note:
|
50 |
+
This model supports only English input texts.
|
51 |
+
|
52 |
+
Voice Selection:
|
53 |
+
- The `voice` parameter specifies the speaker's characteristics and should follow the naming convention:
|
54 |
+
`<language/accent><gender>_<voice_name>`
|
55 |
+
- `<language/accent>`:
|
56 |
+
- 'a' for American English
|
57 |
+
- 'b' for British English
|
58 |
+
- `<gender>`:
|
59 |
+
- 'f' for female
|
60 |
+
- 'm' for male
|
61 |
+
- Example: 'af_heart' indicates an American English female voice named Heart.
|
62 |
|
63 |
Available voices:
|
64 |
+
- af_heart
|
65 |
+
- af_bella
|
66 |
+
- af_nicole
|
67 |
+
- af_aoede
|
68 |
+
- af_kore
|
69 |
+
- af_sarah
|
70 |
+
- af_nova
|
71 |
+
- af_sky
|
72 |
+
- af_alloy
|
73 |
+
- af_jessica
|
74 |
+
- af_river
|
75 |
+
- am_michael
|
76 |
+
- am_fenrir
|
77 |
+
- am_puck
|
78 |
+
- am_echo
|
79 |
+
- am_eric
|
80 |
+
- am_liam
|
81 |
+
- am_onyx
|
82 |
+
- am_santa
|
83 |
+
- am_adam
|
84 |
+
- bf_emma
|
85 |
+
- bf_isabella
|
86 |
+
- bf_alice
|
87 |
+
- bf_lily
|
88 |
+
- bm_george
|
89 |
+
- bm_fable
|
90 |
+
- bm_lewis
|
91 |
+
- bm_daniel
|
92 |
|
93 |
Args:
|
94 |
+
text: Input text to be synthesized. Only English text is supported. Non-English input may result in errors or mispronunciations.
|
95 |
+
voice: Identifier for the voice to be used in synthesis. Defaults to "af_heart".
|
96 |
+
speed: Playback speed multiplier. A value of 1.0 means normal speed; values above or below adjust the speech rate accordingly. Defaults to 1.0.
|
97 |
|
98 |
Returns:
|
99 |
A tuple containing the audio and the tokens used to generate the audio.
|