hysts HF Staff commited on
Commit
8a49e4a
·
1 Parent(s): 691e7fc
Files changed (1) hide show
  1. app.py +46 -32
app.py CHANGED
@@ -44,42 +44,56 @@ for v in CHOICES.values():
44
 
45
  @spaces.GPU(duration=30)
46
  def generate(text: str, voice: str = "af_heart", speed: float = 1.0) -> tuple[tuple[int, np.ndarray], str]:
47
- """Generate audio from text using Kokoro TTS model.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  Available voices:
50
- - af_heart
51
- - af_bella
52
- - af_nicole
53
- - af_aoede
54
- - af_kore
55
- - af_sarah
56
- - af_nova
57
- - af_sky
58
- - af_alloy
59
- - af_jessica
60
- - af_river
61
- - am_michael
62
- - am_fenrir
63
- - am_puck
64
- - am_echo
65
- - am_eric
66
- - am_liam
67
- - am_onyx
68
- - am_santa
69
- - am_adam
70
- - bf_emma
71
- - bf_isabella
72
- - bf_alice
73
- - bf_lily
74
- - bm_george
75
- - bm_fable
76
- - bm_lewis
77
- - bm_daniel
78
 
79
  Args:
80
- text: The text to generate audio from.
81
- voice: The voice to use. Defaults to "af_heart".
82
- speed: The speed of the audio. Defaults to 1.0.
83
 
84
  Returns:
85
  A tuple containing the audio and the tokens used to generate the audio.
 
44
 
45
  @spaces.GPU(duration=30)
46
  def generate(text: str, voice: str = "af_heart", speed: float = 1.0) -> tuple[tuple[int, np.ndarray], str]:
47
+ """Synthesizes speech from English text using the Kokoro TTS model.
48
+
49
+ Note:
50
+ This model supports only English input texts.
51
+
52
+ Voice Selection:
53
+ - The `voice` parameter specifies the speaker's characteristics and should follow the naming convention:
54
+ `<language/accent><gender>_<voice_name>`
55
+ - `<language/accent>`:
56
+ - 'a' for American English
57
+ - 'b' for British English
58
+ - `<gender>`:
59
+ - 'f' for female
60
+ - 'm' for male
61
+ - Example: 'af_heart' indicates an American English female voice named Heart.
62
 
63
  Available voices:
64
+ - af_heart
65
+ - af_bella
66
+ - af_nicole
67
+ - af_aoede
68
+ - af_kore
69
+ - af_sarah
70
+ - af_nova
71
+ - af_sky
72
+ - af_alloy
73
+ - af_jessica
74
+ - af_river
75
+ - am_michael
76
+ - am_fenrir
77
+ - am_puck
78
+ - am_echo
79
+ - am_eric
80
+ - am_liam
81
+ - am_onyx
82
+ - am_santa
83
+ - am_adam
84
+ - bf_emma
85
+ - bf_isabella
86
+ - bf_alice
87
+ - bf_lily
88
+ - bm_george
89
+ - bm_fable
90
+ - bm_lewis
91
+ - bm_daniel
92
 
93
  Args:
94
+ text: Input text to be synthesized. Only English text is supported. Non-English input may result in errors or mispronunciations.
95
+ voice: Identifier for the voice to be used in synthesis. Defaults to "af_heart".
96
+ speed: Playback speed multiplier. A value of 1.0 means normal speed; values above or below adjust the speech rate accordingly. Defaults to 1.0.
97
 
98
  Returns:
99
  A tuple containing the audio and the tokens used to generate the audio.