Spaces:
Running
Running
zach
commited on
Commit
·
d1ed6b1
1
Parent(s):
84c63d1
Add formatter (black) and format all code in src directory— using black default formatting configuration
Browse files- requirements.txt +4 -0
- src/__init__.py +1 -1
- src/app.py +164 -111
- src/config.py +5 -5
- src/constants.py +27 -36
- src/integrations/__init__.py +1 -1
- src/integrations/anthropic_api.py +30 -22
- src/integrations/elevenlabs_api.py +32 -23
- src/integrations/hume_api.py +45 -29
- src/theme.py +33 -39
- src/types.py +4 -3
- src/utils.py +19 -15
requirements.txt
CHANGED
@@ -2,6 +2,7 @@ aiofiles==23.2.1
|
|
2 |
annotated-types==0.7.0
|
3 |
anthropic==0.45.2
|
4 |
anyio==4.8.0
|
|
|
5 |
certifi==2024.12.14
|
6 |
charset-normalizer==3.4.1
|
7 |
click==8.1.8
|
@@ -23,11 +24,14 @@ jiter==0.8.2
|
|
23 |
markdown-it-py==3.0.0
|
24 |
MarkupSafe==2.1.5
|
25 |
mdurl==0.1.2
|
|
|
26 |
numpy==2.2.2
|
27 |
orjson==3.10.15
|
28 |
packaging==24.2
|
29 |
pandas==2.2.3
|
|
|
30 |
pillow==11.1.0
|
|
|
31 |
pydantic==2.10.6
|
32 |
pydantic_core==2.27.2
|
33 |
pydub==0.25.1
|
|
|
2 |
annotated-types==0.7.0
|
3 |
anthropic==0.45.2
|
4 |
anyio==4.8.0
|
5 |
+
black==25.1.0
|
6 |
certifi==2024.12.14
|
7 |
charset-normalizer==3.4.1
|
8 |
click==8.1.8
|
|
|
24 |
markdown-it-py==3.0.0
|
25 |
MarkupSafe==2.1.5
|
26 |
mdurl==0.1.2
|
27 |
+
mypy-extensions==1.0.0
|
28 |
numpy==2.2.2
|
29 |
orjson==3.10.15
|
30 |
packaging==24.2
|
31 |
pandas==2.2.3
|
32 |
+
pathspec==0.12.1
|
33 |
pillow==11.1.0
|
34 |
+
platformdirs==4.3.6
|
35 |
pydantic==2.10.6
|
36 |
pydantic_core==2.27.2
|
37 |
pydub==0.25.1
|
src/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
"""
|
2 |
The root package. This package includes the Gradio application,
|
3 |
integrations with external APIs, and utility functions.
|
4 |
-
"""
|
|
|
1 |
"""
|
2 |
The root package. This package includes the Gradio application,
|
3 |
integrations with external APIs, and utility functions.
|
4 |
+
"""
|
src/app.py
CHANGED
@@ -21,10 +21,10 @@ from src.config import logger
|
|
21 |
from src.constants import (
|
22 |
ELEVENLABS,
|
23 |
HUME_AI,
|
24 |
-
OPTION_A,
|
25 |
OPTION_B,
|
26 |
-
PROMPT_MAX_LENGTH,
|
27 |
-
PROMPT_MIN_LENGTH,
|
28 |
SAMPLE_PROMPTS,
|
29 |
TROPHY_EMOJI,
|
30 |
UNKNOWN_PROVIDER,
|
@@ -45,7 +45,9 @@ from src.types import OptionMap
|
|
45 |
from src.utils import truncate_text, validate_prompt_length
|
46 |
|
47 |
|
48 |
-
def generate_text(
|
|
|
|
|
49 |
"""
|
50 |
Validates the prompt and generates text using Anthropic API.
|
51 |
|
@@ -56,29 +58,33 @@ def generate_text(prompt: str,) -> Tuple[Union[str, gr.update], gr.update]:
|
|
56 |
Tuple containing:
|
57 |
- The generated text (as a gr.update).
|
58 |
- An update for the generated text state.
|
59 |
-
|
60 |
Raises:
|
61 |
gr.Error: On validation or API errors.
|
62 |
"""
|
63 |
try:
|
64 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
65 |
except ValueError as ve:
|
66 |
-
logger.warning(f
|
67 |
raise gr.Error(str(ve))
|
68 |
|
69 |
try:
|
70 |
generated_text = generate_text_with_claude(prompt)
|
71 |
-
logger.info(f
|
72 |
return gr.update(value=generated_text), generated_text
|
73 |
except AnthropicError as ae:
|
74 |
-
logger.error(f
|
75 |
-
raise gr.Error(
|
|
|
|
|
76 |
except Exception as e:
|
77 |
-
logger.error(f
|
78 |
-
raise gr.Error(
|
79 |
|
80 |
|
81 |
-
def text_to_speech(
|
|
|
|
|
82 |
"""
|
83 |
Synthesizes two text to speech outputs, loads the two audio players with the
|
84 |
output audio, and updates related UI state components.
|
@@ -95,46 +101,51 @@ def text_to_speech(prompt: str, text: str, generated_text_state: str) -> Tuple[g
|
|
95 |
- Update for second audio player
|
96 |
- A dictionary mapping options to providers
|
97 |
- The raw audio value for option B
|
98 |
-
|
99 |
Raises:
|
100 |
gr.Error: On API or unexpected errors.
|
101 |
"""
|
102 |
if not text:
|
103 |
-
logger.warning(
|
104 |
-
raise gr.Error(
|
105 |
|
106 |
# If not using generated text, then only compare Hume to Hume
|
107 |
-
compare_hume_with_elevenlabs = (text == generated_text_state) and (
|
108 |
-
|
|
|
|
|
109 |
# Pre-select two Hume voices pre-emptively in case we compare Hume to Hume to ensure we do not select the same voice twice.
|
110 |
hume_voice_a, hume_voice_b = get_random_hume_voice_names()
|
111 |
|
112 |
try:
|
113 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
114 |
provider_a = HUME_AI
|
115 |
-
future_audio_a = executor.submit(
|
|
|
|
|
116 |
|
117 |
if compare_hume_with_elevenlabs:
|
118 |
provider_b = ELEVENLABS
|
119 |
future_audio_b = executor.submit(text_to_speech_with_elevenlabs, text)
|
120 |
else:
|
121 |
provider_b = HUME_AI
|
122 |
-
future_audio_b = executor.submit(
|
123 |
-
|
|
|
|
|
124 |
voice_a, audio_a = future_audio_a.result()
|
125 |
voice_b, audio_b = future_audio_b.result()
|
126 |
|
127 |
-
logger.info(
|
|
|
|
|
128 |
options = [
|
129 |
(audio_a, {"provider": provider_a, "voice": voice_a}),
|
130 |
-
(audio_b, {"provider": provider_b, "voice": voice_b})
|
131 |
]
|
132 |
random.shuffle(options)
|
133 |
option_a_audio, option_b_audio = options[0][0], options[1][0]
|
134 |
-
options_map: OptionMap = {
|
135 |
-
OPTION_A: options[0][1],
|
136 |
-
OPTION_B: options[1][1]
|
137 |
-
}
|
138 |
|
139 |
return (
|
140 |
gr.update(value=option_a_audio, visible=True, autoplay=True),
|
@@ -143,17 +154,23 @@ def text_to_speech(prompt: str, text: str, generated_text_state: str) -> Tuple[g
|
|
143 |
option_b_audio,
|
144 |
)
|
145 |
except ElevenLabsError as ee:
|
146 |
-
logger.error(f
|
147 |
-
raise gr.Error(
|
|
|
|
|
148 |
except HumeError as he:
|
149 |
-
logger.error(f
|
150 |
-
raise gr.Error(
|
|
|
|
|
151 |
except Exception as e:
|
152 |
-
logger.error(f
|
153 |
-
raise gr.Error(
|
154 |
|
155 |
|
156 |
-
def vote(
|
|
|
|
|
157 |
"""
|
158 |
Handles user voting.
|
159 |
|
@@ -178,17 +195,19 @@ def vote(vote_submitted: bool, option_map: OptionMap, selected_button: str) -> T
|
|
178 |
return gr.skip(), gr.skip(), gr.skip(), gr.skip()
|
179 |
|
180 |
option_a_selected = selected_button == VOTE_FOR_OPTION_A
|
181 |
-
selected_option, other_option = (
|
|
|
|
|
182 |
|
183 |
# Parse selected option details from options map
|
184 |
selected_details = option_map.get(selected_option, {})
|
185 |
-
selected_provider = selected_details.get(
|
186 |
-
selected_voice = selected_details.get(
|
187 |
|
188 |
# Parse other option details from options map
|
189 |
other_details = option_map.get(other_option, {})
|
190 |
-
other_provider = other_details.get(
|
191 |
-
other_voice = other_details.get(
|
192 |
|
193 |
# Build button labels, displaying the provider and voice name, appending the trophy emoji to the selected option.
|
194 |
selected_label = f"{selected_provider} | Voice: {selected_voice} {TROPHY_EMOJI}"
|
@@ -196,11 +215,17 @@ def vote(vote_submitted: bool, option_map: OptionMap, selected_button: str) -> T
|
|
196 |
|
197 |
return (
|
198 |
True,
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
else gr.update(value=
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
)
|
205 |
|
206 |
|
@@ -221,8 +246,8 @@ def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, None,
|
|
221 |
return (
|
222 |
gr.update(value=None),
|
223 |
gr.update(value=None),
|
224 |
-
gr.update(value=VOTE_FOR_OPTION_A, variant=
|
225 |
-
gr.update(value=VOTE_FOR_OPTION_B, variant=
|
226 |
None,
|
227 |
None,
|
228 |
False,
|
@@ -230,41 +255,45 @@ def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, None,
|
|
230 |
|
231 |
|
232 |
def build_input_section() -> Tuple[gr.Markdown, gr.Dropdown, gr.Textbox, gr.Button]:
|
233 |
-
"""
|
234 |
-
instructions = gr.Markdown(
|
|
|
235 |
1. **Enter or Generate Text:** Type directly in the Text box, or optionally enter a Prompt, click "Generate text", and edit if needed.
|
236 |
2. **Synthesize Speech:** Click "Synthesize speech" to generate two audio outputs.
|
237 |
3. **Listen & Compare:** Playback both options (A & B) to hear the differences.
|
238 |
4. **Vote for Your Favorite:** Click "Vote for option A" or "Vote for option B" to choose the best one.
|
239 |
-
"""
|
|
|
240 |
sample_prompt_dropdown = gr.Dropdown(
|
241 |
choices=list(SAMPLE_PROMPTS.keys()),
|
242 |
-
label=
|
243 |
value=None,
|
244 |
interactive=True,
|
245 |
)
|
246 |
prompt_input = gr.Textbox(
|
247 |
-
label=
|
248 |
-
placeholder=
|
249 |
lines=2,
|
250 |
max_lines=2,
|
251 |
max_length=PROMPT_MAX_LENGTH,
|
252 |
show_copy_button=True,
|
253 |
)
|
254 |
-
generate_text_button = gr.Button(
|
255 |
return (
|
256 |
-
instructions,
|
257 |
-
sample_prompt_dropdown,
|
258 |
-
prompt_input,
|
259 |
generate_text_button,
|
260 |
)
|
261 |
|
262 |
|
263 |
-
def build_output_section() ->
|
264 |
-
|
|
|
|
|
265 |
text_input = gr.Textbox(
|
266 |
-
label=
|
267 |
-
placeholder=
|
268 |
interactive=True,
|
269 |
autoscroll=False,
|
270 |
lines=4,
|
@@ -272,19 +301,23 @@ def build_output_section() -> Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, g
|
|
272 |
max_length=PROMPT_MAX_LENGTH,
|
273 |
show_copy_button=True,
|
274 |
)
|
275 |
-
synthesize_speech_button = gr.Button(
|
276 |
with gr.Row(equal_height=True):
|
277 |
-
option_a_audio_player = gr.Audio(
|
278 |
-
|
|
|
|
|
|
|
|
|
279 |
with gr.Row(equal_height=True):
|
280 |
vote_button_a = gr.Button(VOTE_FOR_OPTION_A, interactive=False)
|
281 |
vote_button_b = gr.Button(VOTE_FOR_OPTION_B, interactive=False)
|
282 |
return (
|
283 |
-
text_input,
|
284 |
-
synthesize_speech_button,
|
285 |
-
option_a_audio_player,
|
286 |
-
option_b_audio_player,
|
287 |
-
vote_button_a,
|
288 |
vote_button_b,
|
289 |
)
|
290 |
|
@@ -298,39 +331,45 @@ def build_gradio_interface() -> gr.Blocks:
|
|
298 |
"""
|
299 |
custom_theme = CustomTheme()
|
300 |
with gr.Blocks(
|
301 |
-
title=
|
302 |
-
theme=custom_theme,
|
303 |
-
fill_width=True,
|
304 |
-
css_paths=
|
305 |
) as demo:
|
306 |
# Title
|
307 |
-
gr.Markdown(
|
308 |
|
309 |
# Build generate text section
|
310 |
-
(instructions,
|
311 |
-
|
312 |
-
|
313 |
-
generate_text_button) = build_input_section()
|
314 |
|
315 |
# Build synthesize speech section
|
316 |
-
(
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
|
329 |
# --- Register event handlers ---
|
330 |
|
331 |
# When a sample prompt is chosen, update the prompt textbox
|
332 |
sample_prompt_dropdown.change(
|
333 |
-
fn=lambda choice: SAMPLE_PROMPTS.get(choice,
|
334 |
inputs=[sample_prompt_dropdown],
|
335 |
outputs=[prompt_input],
|
336 |
)
|
@@ -342,7 +381,7 @@ def build_gradio_interface() -> gr.Blocks:
|
|
342 |
generate_text_button.click(
|
343 |
fn=lambda: gr.update(interactive=False),
|
344 |
inputs=[],
|
345 |
-
outputs=[generate_text_button]
|
346 |
).then(
|
347 |
fn=generate_text,
|
348 |
inputs=[prompt_input],
|
@@ -350,9 +389,9 @@ def build_gradio_interface() -> gr.Blocks:
|
|
350 |
).then(
|
351 |
fn=lambda: gr.update(interactive=True),
|
352 |
inputs=[],
|
353 |
-
outputs=[generate_text_button]
|
354 |
)
|
355 |
-
|
356 |
# Synthesize speech button click event handler chain:
|
357 |
# 1. Disable the "Synthesize speech" button
|
358 |
# 2. Reset UI state
|
@@ -361,48 +400,58 @@ def build_gradio_interface() -> gr.Blocks:
|
|
361 |
synthesize_speech_button.click(
|
362 |
fn=lambda: gr.update(interactive=False),
|
363 |
inputs=[],
|
364 |
-
outputs=[synthesize_speech_button]
|
365 |
).then(
|
366 |
fn=reset_ui,
|
367 |
inputs=[],
|
368 |
outputs=[
|
369 |
option_a_audio_player,
|
370 |
option_b_audio_player,
|
371 |
-
vote_button_a,
|
372 |
-
vote_button_b,
|
373 |
-
option_map_state,
|
374 |
-
option_b_audio_state,
|
375 |
vote_submitted_state,
|
376 |
],
|
377 |
).then(
|
378 |
fn=text_to_speech,
|
379 |
inputs=[prompt_input, text_input, generated_text_state],
|
380 |
outputs=[
|
381 |
-
option_a_audio_player,
|
382 |
-
option_b_audio_player,
|
383 |
-
option_map_state,
|
384 |
option_b_audio_state,
|
385 |
],
|
386 |
).then(
|
387 |
fn=lambda: (
|
388 |
-
gr.update(interactive=True),
|
389 |
-
gr.update(interactive=True),
|
390 |
-
gr.update(interactive=True)
|
391 |
),
|
392 |
inputs=[],
|
393 |
-
outputs=[synthesize_speech_button, vote_button_a, vote_button_b]
|
394 |
)
|
395 |
|
396 |
# Vote button click event handlers
|
397 |
vote_button_a.click(
|
398 |
fn=vote,
|
399 |
inputs=[vote_submitted_state, option_map_state, vote_button_a],
|
400 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
401 |
)
|
402 |
vote_button_b.click(
|
403 |
fn=vote,
|
404 |
inputs=[vote_submitted_state, option_map_state, vote_button_b],
|
405 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
406 |
)
|
407 |
|
408 |
# Auto-play second audio after first finishes (Workaround to play audio back-to-back)
|
@@ -421,16 +470,20 @@ def build_gradio_interface() -> gr.Blocks:
|
|
421 |
|
422 |
# Enable voting after second audio option playback finishes
|
423 |
option_b_audio_player.stop(
|
424 |
-
fn=lambda _: (
|
|
|
|
|
|
|
|
|
425 |
inputs=[],
|
426 |
outputs=[vote_button_a, vote_button_b, option_b_audio_player],
|
427 |
)
|
428 |
|
429 |
-
logger.debug(
|
430 |
return demo
|
431 |
|
432 |
|
433 |
-
if __name__ ==
|
434 |
-
logger.info(
|
435 |
demo = build_gradio_interface()
|
436 |
-
demo.launch()
|
|
|
21 |
from src.constants import (
|
22 |
ELEVENLABS,
|
23 |
HUME_AI,
|
24 |
+
OPTION_A,
|
25 |
OPTION_B,
|
26 |
+
PROMPT_MAX_LENGTH,
|
27 |
+
PROMPT_MIN_LENGTH,
|
28 |
SAMPLE_PROMPTS,
|
29 |
TROPHY_EMOJI,
|
30 |
UNKNOWN_PROVIDER,
|
|
|
45 |
from src.utils import truncate_text, validate_prompt_length
|
46 |
|
47 |
|
48 |
+
def generate_text(
|
49 |
+
prompt: str,
|
50 |
+
) -> Tuple[Union[str, gr.update], gr.update]:
|
51 |
"""
|
52 |
Validates the prompt and generates text using Anthropic API.
|
53 |
|
|
|
58 |
Tuple containing:
|
59 |
- The generated text (as a gr.update).
|
60 |
- An update for the generated text state.
|
61 |
+
|
62 |
Raises:
|
63 |
gr.Error: On validation or API errors.
|
64 |
"""
|
65 |
try:
|
66 |
validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
|
67 |
except ValueError as ve:
|
68 |
+
logger.warning(f"Validation error: {ve}")
|
69 |
raise gr.Error(str(ve))
|
70 |
|
71 |
try:
|
72 |
generated_text = generate_text_with_claude(prompt)
|
73 |
+
logger.info(f"Generated text ({len(generated_text)} characters).")
|
74 |
return gr.update(value=generated_text), generated_text
|
75 |
except AnthropicError as ae:
|
76 |
+
logger.error(f"AnthropicError while generating text: {str(ae)}")
|
77 |
+
raise gr.Error(
|
78 |
+
"There was an issue communicating with the Anthropic API. Please try again later."
|
79 |
+
)
|
80 |
except Exception as e:
|
81 |
+
logger.error(f"Unexpected error while generating text: {e}")
|
82 |
+
raise gr.Error("Failed to generate text. Please try again.")
|
83 |
|
84 |
|
85 |
+
def text_to_speech(
|
86 |
+
prompt: str, text: str, generated_text_state: str
|
87 |
+
) -> Tuple[gr.update, gr.update, dict, Union[str, None]]:
|
88 |
"""
|
89 |
Synthesizes two text to speech outputs, loads the two audio players with the
|
90 |
output audio, and updates related UI state components.
|
|
|
101 |
- Update for second audio player
|
102 |
- A dictionary mapping options to providers
|
103 |
- The raw audio value for option B
|
104 |
+
|
105 |
Raises:
|
106 |
gr.Error: On API or unexpected errors.
|
107 |
"""
|
108 |
if not text:
|
109 |
+
logger.warning("Skipping text-to-speech due to empty text.")
|
110 |
+
raise gr.Error("Please generate or enter text to synthesize.")
|
111 |
|
112 |
# If not using generated text, then only compare Hume to Hume
|
113 |
+
compare_hume_with_elevenlabs = (text == generated_text_state) and (
|
114 |
+
random.random() < 0.5
|
115 |
+
)
|
116 |
+
|
117 |
# Pre-select two Hume voices pre-emptively in case we compare Hume to Hume to ensure we do not select the same voice twice.
|
118 |
hume_voice_a, hume_voice_b = get_random_hume_voice_names()
|
119 |
|
120 |
try:
|
121 |
with ThreadPoolExecutor(max_workers=2) as executor:
|
122 |
provider_a = HUME_AI
|
123 |
+
future_audio_a = executor.submit(
|
124 |
+
text_to_speech_with_hume, prompt, text, hume_voice_a
|
125 |
+
)
|
126 |
|
127 |
if compare_hume_with_elevenlabs:
|
128 |
provider_b = ELEVENLABS
|
129 |
future_audio_b = executor.submit(text_to_speech_with_elevenlabs, text)
|
130 |
else:
|
131 |
provider_b = HUME_AI
|
132 |
+
future_audio_b = executor.submit(
|
133 |
+
text_to_speech_with_hume, prompt, text, hume_voice_b
|
134 |
+
)
|
135 |
+
|
136 |
voice_a, audio_a = future_audio_a.result()
|
137 |
voice_b, audio_b = future_audio_b.result()
|
138 |
|
139 |
+
logger.info(
|
140 |
+
f"TTS generated: {provider_a}={len(audio_a)} bytes, {provider_b}={len(audio_b)} bytes"
|
141 |
+
)
|
142 |
options = [
|
143 |
(audio_a, {"provider": provider_a, "voice": voice_a}),
|
144 |
+
(audio_b, {"provider": provider_b, "voice": voice_b}),
|
145 |
]
|
146 |
random.shuffle(options)
|
147 |
option_a_audio, option_b_audio = options[0][0], options[1][0]
|
148 |
+
options_map: OptionMap = {OPTION_A: options[0][1], OPTION_B: options[1][1]}
|
|
|
|
|
|
|
149 |
|
150 |
return (
|
151 |
gr.update(value=option_a_audio, visible=True, autoplay=True),
|
|
|
154 |
option_b_audio,
|
155 |
)
|
156 |
except ElevenLabsError as ee:
|
157 |
+
logger.error(f"ElevenLabsError while synthesizing speech from text: {str(ee)}")
|
158 |
+
raise gr.Error(
|
159 |
+
"There was an issue communicating with the Elevenlabs API. Please try again later."
|
160 |
+
)
|
161 |
except HumeError as he:
|
162 |
+
logger.error(f"HumeError while synthesizing speech from text: {str(he)}")
|
163 |
+
raise gr.Error(
|
164 |
+
"There was an issue communicating with the Hume API. Please try again later."
|
165 |
+
)
|
166 |
except Exception as e:
|
167 |
+
logger.error(f"Unexpected error during TTS generation: {e}")
|
168 |
+
raise gr.Error("An unexpected error ocurred. Please try again later.")
|
169 |
|
170 |
|
171 |
+
def vote(
|
172 |
+
vote_submitted: bool, option_map: OptionMap, selected_button: str
|
173 |
+
) -> Tuple[bool, gr.update, gr.update, gr.update]:
|
174 |
"""
|
175 |
Handles user voting.
|
176 |
|
|
|
195 |
return gr.skip(), gr.skip(), gr.skip(), gr.skip()
|
196 |
|
197 |
option_a_selected = selected_button == VOTE_FOR_OPTION_A
|
198 |
+
selected_option, other_option = (
|
199 |
+
(OPTION_A, OPTION_B) if option_a_selected else (OPTION_B, OPTION_A)
|
200 |
+
)
|
201 |
|
202 |
# Parse selected option details from options map
|
203 |
selected_details = option_map.get(selected_option, {})
|
204 |
+
selected_provider = selected_details.get("provider", UNKNOWN_PROVIDER)
|
205 |
+
selected_voice = selected_details.get("voice", "")
|
206 |
|
207 |
# Parse other option details from options map
|
208 |
other_details = option_map.get(other_option, {})
|
209 |
+
other_provider = other_details.get("provider", UNKNOWN_PROVIDER)
|
210 |
+
other_voice = other_details.get("voice", "")
|
211 |
|
212 |
# Build button labels, displaying the provider and voice name, appending the trophy emoji to the selected option.
|
213 |
selected_label = f"{selected_provider} | Voice: {selected_voice} {TROPHY_EMOJI}"
|
|
|
215 |
|
216 |
return (
|
217 |
True,
|
218 |
+
(
|
219 |
+
gr.update(value=selected_label, variant="primary", interactive=False)
|
220 |
+
if option_a_selected
|
221 |
+
else gr.update(value=other_label, variant="secondary", interactive=False)
|
222 |
+
),
|
223 |
+
(
|
224 |
+
gr.update(value=other_label, variant="secondary", interactive=False)
|
225 |
+
if option_a_selected
|
226 |
+
else gr.update(value=selected_label, variant="primary", interactive=False)
|
227 |
+
),
|
228 |
+
gr.update(interactive=True),
|
229 |
)
|
230 |
|
231 |
|
|
|
246 |
return (
|
247 |
gr.update(value=None),
|
248 |
gr.update(value=None),
|
249 |
+
gr.update(value=VOTE_FOR_OPTION_A, variant="secondary"),
|
250 |
+
gr.update(value=VOTE_FOR_OPTION_B, variant="secondary"),
|
251 |
None,
|
252 |
None,
|
253 |
False,
|
|
|
255 |
|
256 |
|
257 |
def build_input_section() -> Tuple[gr.Markdown, gr.Dropdown, gr.Textbox, gr.Button]:
|
258 |
+
"""Builds the input section including instructions, sample prompt dropdown, prompt input, and generate button"""
|
259 |
+
instructions = gr.Markdown(
|
260 |
+
"""
|
261 |
1. **Enter or Generate Text:** Type directly in the Text box, or optionally enter a Prompt, click "Generate text", and edit if needed.
|
262 |
2. **Synthesize Speech:** Click "Synthesize speech" to generate two audio outputs.
|
263 |
3. **Listen & Compare:** Playback both options (A & B) to hear the differences.
|
264 |
4. **Vote for Your Favorite:** Click "Vote for option A" or "Vote for option B" to choose the best one.
|
265 |
+
"""
|
266 |
+
)
|
267 |
sample_prompt_dropdown = gr.Dropdown(
|
268 |
choices=list(SAMPLE_PROMPTS.keys()),
|
269 |
+
label="Choose a sample prompt (or enter your own)",
|
270 |
value=None,
|
271 |
interactive=True,
|
272 |
)
|
273 |
prompt_input = gr.Textbox(
|
274 |
+
label="Prompt",
|
275 |
+
placeholder="Enter your prompt...",
|
276 |
lines=2,
|
277 |
max_lines=2,
|
278 |
max_length=PROMPT_MAX_LENGTH,
|
279 |
show_copy_button=True,
|
280 |
)
|
281 |
+
generate_text_button = gr.Button("Generate text", variant="secondary")
|
282 |
return (
|
283 |
+
instructions,
|
284 |
+
sample_prompt_dropdown,
|
285 |
+
prompt_input,
|
286 |
generate_text_button,
|
287 |
)
|
288 |
|
289 |
|
290 |
+
def build_output_section() -> (
|
291 |
+
Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, gr.Button, gr.Button]
|
292 |
+
):
|
293 |
+
"""Builds the output section including generated text, audio players, and vote buttons."""
|
294 |
text_input = gr.Textbox(
|
295 |
+
label="Text",
|
296 |
+
placeholder="Enter text to synthesize speech...",
|
297 |
interactive=True,
|
298 |
autoscroll=False,
|
299 |
lines=4,
|
|
|
301 |
max_length=PROMPT_MAX_LENGTH,
|
302 |
show_copy_button=True,
|
303 |
)
|
304 |
+
synthesize_speech_button = gr.Button("Synthesize speech", variant="primary")
|
305 |
with gr.Row(equal_height=True):
|
306 |
+
option_a_audio_player = gr.Audio(
|
307 |
+
label=OPTION_A, type="filepath", interactive=False
|
308 |
+
)
|
309 |
+
option_b_audio_player = gr.Audio(
|
310 |
+
label=OPTION_B, type="filepath", interactive=False
|
311 |
+
)
|
312 |
with gr.Row(equal_height=True):
|
313 |
vote_button_a = gr.Button(VOTE_FOR_OPTION_A, interactive=False)
|
314 |
vote_button_b = gr.Button(VOTE_FOR_OPTION_B, interactive=False)
|
315 |
return (
|
316 |
+
text_input,
|
317 |
+
synthesize_speech_button,
|
318 |
+
option_a_audio_player,
|
319 |
+
option_b_audio_player,
|
320 |
+
vote_button_a,
|
321 |
vote_button_b,
|
322 |
)
|
323 |
|
|
|
331 |
"""
|
332 |
custom_theme = CustomTheme()
|
333 |
with gr.Blocks(
|
334 |
+
title="Expressive TTS Arena",
|
335 |
+
theme=custom_theme,
|
336 |
+
fill_width=True,
|
337 |
+
css_paths="src/assets/styles.css",
|
338 |
) as demo:
|
339 |
# Title
|
340 |
+
gr.Markdown("# Expressive TTS Arena")
|
341 |
|
342 |
# Build generate text section
|
343 |
+
(instructions, sample_prompt_dropdown, prompt_input, generate_text_button) = (
|
344 |
+
build_input_section()
|
345 |
+
)
|
|
|
346 |
|
347 |
# Build synthesize speech section
|
348 |
+
(
|
349 |
+
text_input,
|
350 |
+
synthesize_speech_button,
|
351 |
+
option_a_audio_player,
|
352 |
+
option_b_audio_player,
|
353 |
+
vote_button_a,
|
354 |
+
vote_button_b,
|
355 |
+
) = build_output_section()
|
356 |
+
|
357 |
+
# --- UI state components ---
|
358 |
+
|
359 |
+
# Track generated text state
|
360 |
+
generated_text_state = gr.State("")
|
361 |
+
# Track generated audio for option B for playing automatically after option 1 audio finishes
|
362 |
+
option_b_audio_state = gr.State()
|
363 |
+
# Track option map (option A and option B are randomized)
|
364 |
+
option_map_state = gr.State()
|
365 |
+
# Track whether the user has voted for an option
|
366 |
+
vote_submitted_state = gr.State(False)
|
367 |
|
368 |
# --- Register event handlers ---
|
369 |
|
370 |
# When a sample prompt is chosen, update the prompt textbox
|
371 |
sample_prompt_dropdown.change(
|
372 |
+
fn=lambda choice: SAMPLE_PROMPTS.get(choice, ""),
|
373 |
inputs=[sample_prompt_dropdown],
|
374 |
outputs=[prompt_input],
|
375 |
)
|
|
|
381 |
generate_text_button.click(
|
382 |
fn=lambda: gr.update(interactive=False),
|
383 |
inputs=[],
|
384 |
+
outputs=[generate_text_button],
|
385 |
).then(
|
386 |
fn=generate_text,
|
387 |
inputs=[prompt_input],
|
|
|
389 |
).then(
|
390 |
fn=lambda: gr.update(interactive=True),
|
391 |
inputs=[],
|
392 |
+
outputs=[generate_text_button],
|
393 |
)
|
394 |
+
|
395 |
# Synthesize speech button click event handler chain:
|
396 |
# 1. Disable the "Synthesize speech" button
|
397 |
# 2. Reset UI state
|
|
|
400 |
synthesize_speech_button.click(
|
401 |
fn=lambda: gr.update(interactive=False),
|
402 |
inputs=[],
|
403 |
+
outputs=[synthesize_speech_button],
|
404 |
).then(
|
405 |
fn=reset_ui,
|
406 |
inputs=[],
|
407 |
outputs=[
|
408 |
option_a_audio_player,
|
409 |
option_b_audio_player,
|
410 |
+
vote_button_a,
|
411 |
+
vote_button_b,
|
412 |
+
option_map_state,
|
413 |
+
option_b_audio_state,
|
414 |
vote_submitted_state,
|
415 |
],
|
416 |
).then(
|
417 |
fn=text_to_speech,
|
418 |
inputs=[prompt_input, text_input, generated_text_state],
|
419 |
outputs=[
|
420 |
+
option_a_audio_player,
|
421 |
+
option_b_audio_player,
|
422 |
+
option_map_state,
|
423 |
option_b_audio_state,
|
424 |
],
|
425 |
).then(
|
426 |
fn=lambda: (
|
427 |
+
gr.update(interactive=True),
|
428 |
+
gr.update(interactive=True),
|
429 |
+
gr.update(interactive=True),
|
430 |
),
|
431 |
inputs=[],
|
432 |
+
outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
|
433 |
)
|
434 |
|
435 |
# Vote button click event handlers
|
436 |
vote_button_a.click(
|
437 |
fn=vote,
|
438 |
inputs=[vote_submitted_state, option_map_state, vote_button_a],
|
439 |
+
outputs=[
|
440 |
+
vote_submitted_state,
|
441 |
+
vote_button_a,
|
442 |
+
vote_button_b,
|
443 |
+
synthesize_speech_button,
|
444 |
+
],
|
445 |
)
|
446 |
vote_button_b.click(
|
447 |
fn=vote,
|
448 |
inputs=[vote_submitted_state, option_map_state, vote_button_b],
|
449 |
+
outputs=[
|
450 |
+
vote_submitted_state,
|
451 |
+
vote_button_a,
|
452 |
+
vote_button_b,
|
453 |
+
synthesize_speech_button,
|
454 |
+
],
|
455 |
)
|
456 |
|
457 |
# Auto-play second audio after first finishes (Workaround to play audio back-to-back)
|
|
|
470 |
|
471 |
# Enable voting after second audio option playback finishes
|
472 |
option_b_audio_player.stop(
|
473 |
+
fn=lambda _: (
|
474 |
+
gr.update(interactive=True),
|
475 |
+
gr.update(interactive=True),
|
476 |
+
gr.update(autoplay=False),
|
477 |
+
),
|
478 |
inputs=[],
|
479 |
outputs=[vote_button_a, vote_button_b, option_b_audio_player],
|
480 |
)
|
481 |
|
482 |
+
logger.debug("Gradio interface built successfully")
|
483 |
return demo
|
484 |
|
485 |
|
486 |
+
if __name__ == "__main__":
|
487 |
+
logger.info("Launching TTS Arena Gradio app...")
|
488 |
demo = build_gradio_interface()
|
489 |
+
demo.launch()
|
src/config.py
CHANGED
@@ -22,10 +22,10 @@ load_dotenv()
|
|
22 |
|
23 |
|
24 |
# Enable debugging mode based on an environment variable
|
25 |
-
debug_raw = os.getenv(
|
26 |
-
if debug_raw not in {
|
27 |
print(f'Warning: Invalid DEBUG value "{debug_raw}". Defaulting to "false".')
|
28 |
-
DEBUG = debug_raw ==
|
29 |
|
30 |
|
31 |
# Configure the logger
|
@@ -33,8 +33,8 @@ logging.basicConfig(
|
|
33 |
level=logging.DEBUG if DEBUG else logging.INFO,
|
34 |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
35 |
)
|
36 |
-
logger: logging.Logger = logging.getLogger(
|
37 |
logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
|
38 |
|
39 |
if DEBUG:
|
40 |
-
logger.debug(f
|
|
|
22 |
|
23 |
|
24 |
# Enable debugging mode based on an environment variable
|
25 |
+
debug_raw = os.getenv("DEBUG", "false").lower()
|
26 |
+
if debug_raw not in {"true", "false"}:
|
27 |
print(f'Warning: Invalid DEBUG value "{debug_raw}". Defaulting to "false".')
|
28 |
+
DEBUG = debug_raw == "true"
|
29 |
|
30 |
|
31 |
# Configure the logger
|
|
|
33 |
level=logging.DEBUG if DEBUG else logging.INFO,
|
34 |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
35 |
)
|
36 |
+
logger: logging.Logger = logging.getLogger("tts_arena")
|
37 |
logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
|
38 |
|
39 |
if DEBUG:
|
40 |
+
logger.debug(f"DEBUG mode enabled.")
|
src/constants.py
CHANGED
@@ -7,47 +7,38 @@ This module defines global constants used throughout the project.
|
|
7 |
from src.types import OptionKey, TTSProviderName
|
8 |
|
9 |
# UI constants
|
10 |
-
HUME_AI: TTSProviderName =
|
11 |
-
ELEVENLABS: TTSProviderName =
|
12 |
-
UNKNOWN_PROVIDER: TTSProviderName =
|
13 |
|
14 |
PROMPT_MIN_LENGTH: int = 10
|
15 |
PROMPT_MAX_LENGTH: int = 400
|
16 |
|
17 |
-
OPTION_A: OptionKey =
|
18 |
-
OPTION_B: OptionKey =
|
19 |
-
TROPHY_EMOJI: str =
|
20 |
-
VOTE_FOR_OPTION_A: str =
|
21 |
-
VOTE_FOR_OPTION_B: str =
|
22 |
|
23 |
|
24 |
-
# A collection of pre-defined prompts categorized by theme, used to provide users with
|
25 |
# inspiration for generating creative text for expressive TTS.
|
26 |
SAMPLE_PROMPTS: dict = {
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
"
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
'tyrant. The speech should be urgent, filled with conviction, and call for freedom and justice, '
|
46 |
-
'making sure the emotional intensity is evident in the phrasing.',
|
47 |
-
|
48 |
-
'👻 Mysterious Horror Scene (Haunted Lighthouse)':
|
49 |
-
'Describe a chilling ghostly encounter in an abandoned lighthouse on a foggy night. The '
|
50 |
-
'protagonist, alone and cold, hears whispers from the shadows, telling them secrets they were '
|
51 |
-
'never meant to know. Use language that builds suspense and tension, ensuring it sounds '
|
52 |
-
'haunting and engaging.'
|
53 |
-
}
|
|
|
7 |
from src.types import OptionKey, TTSProviderName
|
8 |
|
9 |
# UI constants
|
10 |
+
HUME_AI: TTSProviderName = "Hume AI"
|
11 |
+
ELEVENLABS: TTSProviderName = "ElevenLabs"
|
12 |
+
UNKNOWN_PROVIDER: TTSProviderName = "Unknown"
|
13 |
|
14 |
PROMPT_MIN_LENGTH: int = 10
|
15 |
PROMPT_MAX_LENGTH: int = 400
|
16 |
|
17 |
+
OPTION_A: OptionKey = "Option A"
|
18 |
+
OPTION_B: OptionKey = "Option B"
|
19 |
+
TROPHY_EMOJI: str = "🏆"
|
20 |
+
VOTE_FOR_OPTION_A: str = "Vote for option A"
|
21 |
+
VOTE_FOR_OPTION_B: str = "Vote for option B"
|
22 |
|
23 |
|
24 |
+
# A collection of pre-defined prompts categorized by theme, used to provide users with
|
25 |
# inspiration for generating creative text for expressive TTS.
|
26 |
SAMPLE_PROMPTS: dict = {
|
27 |
+
"🚀 Dramatic Monologue (Stranded Astronaut)": "Write a short dramatic monologue from a lone astronaut stranded on Mars, speaking to "
|
28 |
+
"mission control for the last time. The tone should be reflective and filled with awe, conveying "
|
29 |
+
"resignation and finality. Describe the Martian landscape and their thoughts in a way that "
|
30 |
+
"would evoke emotion and depth.",
|
31 |
+
"📜 Poetic Sonnet (The Passage of Time)": "Compose a concise sonnet about the passage of time, using vivid imagery and a flowing, "
|
32 |
+
"melodic rhythm. The poem should evoke the contrast between fleeting moments and eternity, "
|
33 |
+
"capturing both beauty and melancholy, with natural pacing for speech delivery.",
|
34 |
+
"🐱 Whimsical Children's Story (Talking Cat)": "Tell a short, whimsical bedtime story about a mischievous talking cat who sneaks into a grand "
|
35 |
+
"wizard’s library at night and accidentally casts a spell that brings the books to life. Keep the "
|
36 |
+
"tone playful and filled with wonder, ensuring the language flows smoothly.",
|
37 |
+
"🔥 Intense Speech (Freedom & Justice)": "Write a powerful, impassioned speech from a rebel leader rallying their people against a "
|
38 |
+
"tyrant. The speech should be urgent, filled with conviction, and call for freedom and justice, "
|
39 |
+
"making sure the emotional intensity is evident in the phrasing.",
|
40 |
+
"👻 Mysterious Horror Scene (Haunted Lighthouse)": "Describe a chilling ghostly encounter in an abandoned lighthouse on a foggy night. The "
|
41 |
+
"protagonist, alone and cold, hears whispers from the shadows, telling them secrets they were "
|
42 |
+
"never meant to know. Use language that builds suspense and tension, ensuring it sounds "
|
43 |
+
"haunting and engaging.",
|
44 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/integrations/__init__.py
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
from .anthropic_api import generate_text_with_claude, AnthropicError
|
2 |
from .elevenlabs_api import text_to_speech_with_elevenlabs, ElevenLabsError
|
3 |
-
from .hume_api import text_to_speech_with_hume, get_random_hume_voice_names, HumeError
|
|
|
1 |
from .anthropic_api import generate_text_with_claude, AnthropicError
|
2 |
from .elevenlabs_api import text_to_speech_with_elevenlabs, ElevenLabsError
|
3 |
+
from .hume_api import text_to_speech_with_hume, get_random_hume_voice_names, HumeError
|
src/integrations/anthropic_api.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
"""
|
2 |
anthropic_api.py
|
3 |
|
4 |
-
This file defines the interaction with the Anthropic API, focusing on generating text using the Claude model.
|
5 |
It includes functionality for input validation, API request handling, and processing API responses.
|
6 |
|
7 |
Key Features:
|
@@ -36,8 +36,9 @@ from src.utils import truncate_text, validate_env_var
|
|
36 |
@dataclass(frozen=True)
|
37 |
class AnthropicConfig:
|
38 |
"""Immutable configuration for interacting with the Anthropic API."""
|
39 |
-
|
40 |
-
|
|
|
41 |
max_tokens: int = 256
|
42 |
system_prompt: str = f"""You are an imaginative and articulate assistant, skilled in generating creative, concise, and engaging content that is perfectly suited for expressive speech synthesis.
|
43 |
|
@@ -60,13 +61,13 @@ The generated text will be directly fed into TTS APIs, so avoid ambiguity, and a
|
|
60 |
def __post_init__(self):
|
61 |
# Validate that required attributes are set
|
62 |
if not self.api_key:
|
63 |
-
raise ValueError(
|
64 |
if not self.model:
|
65 |
-
raise ValueError(
|
66 |
if not self.max_tokens:
|
67 |
-
raise ValueError(
|
68 |
if not self.system_prompt:
|
69 |
-
raise ValueError(
|
70 |
|
71 |
@property
|
72 |
def client(self) -> Anthropic:
|
@@ -81,6 +82,7 @@ The generated text will be directly fed into TTS APIs, so avoid ambiguity, and a
|
|
81 |
|
82 |
class AnthropicError(Exception):
|
83 |
"""Custom exception for errors related to the Anthropic API."""
|
|
|
84 |
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
85 |
super().__init__(message)
|
86 |
self.original_exception = original_exception
|
@@ -95,7 +97,7 @@ anthropic_config = AnthropicConfig()
|
|
95 |
wait=wait_fixed(2),
|
96 |
before=before_log(logger, logging.DEBUG),
|
97 |
after=after_log(logger, logging.DEBUG),
|
98 |
-
reraise=True
|
99 |
)
|
100 |
def generate_text_with_claude(prompt: str) -> str:
|
101 |
"""
|
@@ -110,7 +112,9 @@ def generate_text_with_claude(prompt: str) -> str:
|
|
110 |
Raises:
|
111 |
AnthropicError: If there is an error communicating with the Anthropic API.
|
112 |
"""
|
113 |
-
logger.debug(
|
|
|
|
|
114 |
|
115 |
response = None
|
116 |
try:
|
@@ -119,36 +123,40 @@ def generate_text_with_claude(prompt: str) -> str:
|
|
119 |
model=anthropic_config.model,
|
120 |
max_tokens=anthropic_config.max_tokens,
|
121 |
system=anthropic_config.system_prompt,
|
122 |
-
messages=[{
|
123 |
)
|
124 |
-
logger.debug(f
|
125 |
|
126 |
# Validate response
|
127 |
-
if not hasattr(response,
|
128 |
logger.error("Response is missing 'content'. Response: %s", response)
|
129 |
raise AnthropicError('Invalid API response: Missing "content".')
|
130 |
|
131 |
# Process response
|
132 |
blocks: Union[List[TextBlock], TextBlock, None] = response.content
|
133 |
if isinstance(blocks, list):
|
134 |
-
result =
|
135 |
-
|
|
|
|
|
136 |
return result
|
137 |
if isinstance(blocks, TextBlock):
|
138 |
-
logger.debug(
|
|
|
|
|
139 |
return blocks.text
|
140 |
|
141 |
-
logger.warning(f
|
142 |
-
return str(blocks or
|
143 |
|
144 |
except Exception as e:
|
145 |
-
logger.exception(f
|
146 |
raise AnthropicError(
|
147 |
message=(
|
148 |
-
f
|
149 |
f'HTTP Status: {getattr(response, "status", "N/A")}. '
|
150 |
-
f
|
151 |
-
f
|
152 |
),
|
153 |
original_exception=e,
|
154 |
-
)
|
|
|
1 |
"""
|
2 |
anthropic_api.py
|
3 |
|
4 |
+
This file defines the interaction with the Anthropic API, focusing on generating text using the Claude model.
|
5 |
It includes functionality for input validation, API request handling, and processing API responses.
|
6 |
|
7 |
Key Features:
|
|
|
36 |
@dataclass(frozen=True)
|
37 |
class AnthropicConfig:
|
38 |
"""Immutable configuration for interacting with the Anthropic API."""
|
39 |
+
|
40 |
+
api_key: str = validate_env_var("ANTHROPIC_API_KEY")
|
41 |
+
model: ModelParam = "claude-3-5-sonnet-latest"
|
42 |
max_tokens: int = 256
|
43 |
system_prompt: str = f"""You are an imaginative and articulate assistant, skilled in generating creative, concise, and engaging content that is perfectly suited for expressive speech synthesis.
|
44 |
|
|
|
61 |
def __post_init__(self):
|
62 |
# Validate that required attributes are set
|
63 |
if not self.api_key:
|
64 |
+
raise ValueError("Anthropic API key is not set.")
|
65 |
if not self.model:
|
66 |
+
raise ValueError("Anthropic Model is not set.")
|
67 |
if not self.max_tokens:
|
68 |
+
raise ValueError("Anthropic Max Tokens is not set.")
|
69 |
if not self.system_prompt:
|
70 |
+
raise ValueError("Anthropic System Prompt is not set.")
|
71 |
|
72 |
@property
|
73 |
def client(self) -> Anthropic:
|
|
|
82 |
|
83 |
class AnthropicError(Exception):
|
84 |
"""Custom exception for errors related to the Anthropic API."""
|
85 |
+
|
86 |
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
87 |
super().__init__(message)
|
88 |
self.original_exception = original_exception
|
|
|
97 |
wait=wait_fixed(2),
|
98 |
before=before_log(logger, logging.DEBUG),
|
99 |
after=after_log(logger, logging.DEBUG),
|
100 |
+
reraise=True,
|
101 |
)
|
102 |
def generate_text_with_claude(prompt: str) -> str:
|
103 |
"""
|
|
|
112 |
Raises:
|
113 |
AnthropicError: If there is an error communicating with the Anthropic API.
|
114 |
"""
|
115 |
+
logger.debug(
|
116 |
+
f"Generating text with Claude. Prompt length: {len(prompt)} characters."
|
117 |
+
)
|
118 |
|
119 |
response = None
|
120 |
try:
|
|
|
123 |
model=anthropic_config.model,
|
124 |
max_tokens=anthropic_config.max_tokens,
|
125 |
system=anthropic_config.system_prompt,
|
126 |
+
messages=[{"role": "user", "content": prompt}],
|
127 |
)
|
128 |
+
logger.debug(f"API response received: {truncate_text(str(response))}")
|
129 |
|
130 |
# Validate response
|
131 |
+
if not hasattr(response, "content"):
|
132 |
logger.error("Response is missing 'content'. Response: %s", response)
|
133 |
raise AnthropicError('Invalid API response: Missing "content".')
|
134 |
|
135 |
# Process response
|
136 |
blocks: Union[List[TextBlock], TextBlock, None] = response.content
|
137 |
if isinstance(blocks, list):
|
138 |
+
result = "\n\n".join(
|
139 |
+
block.text for block in blocks if isinstance(block, TextBlock)
|
140 |
+
)
|
141 |
+
logger.debug(f"Processed response from list: {truncate_text(result)}")
|
142 |
return result
|
143 |
if isinstance(blocks, TextBlock):
|
144 |
+
logger.debug(
|
145 |
+
f"Processed response from single TextBlock: {truncate_text(blocks.text)}"
|
146 |
+
)
|
147 |
return blocks.text
|
148 |
|
149 |
+
logger.warning(f"Unexpected response type: {type(blocks)}")
|
150 |
+
return str(blocks or "No content generated.")
|
151 |
|
152 |
except Exception as e:
|
153 |
+
logger.exception(f"Error generating text with Anthropic: {e}")
|
154 |
raise AnthropicError(
|
155 |
message=(
|
156 |
+
f"Error generating text with Anthropic: {e}. "
|
157 |
f'HTTP Status: {getattr(response, "status", "N/A")}. '
|
158 |
+
f"Prompt (truncated): {truncate_text(prompt)}. "
|
159 |
+
f"Model: {anthropic_config.model}, Max tokens: {anthropic_config.max_tokens}"
|
160 |
),
|
161 |
original_exception=e,
|
162 |
+
)
|
src/integrations/elevenlabs_api.py
CHANGED
@@ -35,13 +35,14 @@ from src.config import logger
|
|
35 |
from src.utils import validate_env_var
|
36 |
|
37 |
|
38 |
-
ElevenlabsVoiceName = Literal[
|
|
|
39 |
|
40 |
class ElevenLabsVoice(Enum):
|
41 |
-
ADAM = (
|
42 |
-
ANTONI = (
|
43 |
-
RACHEL = (
|
44 |
-
MATILDA = (
|
45 |
|
46 |
@property
|
47 |
def voice_name(self) -> ElevenlabsVoiceName:
|
@@ -57,19 +58,22 @@ class ElevenLabsVoice(Enum):
|
|
57 |
@dataclass(frozen=True)
|
58 |
class ElevenLabsConfig:
|
59 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
63 |
|
64 |
def __post_init__(self):
|
65 |
# Validate that required attributes are set
|
66 |
if not self.api_key:
|
67 |
-
raise ValueError(
|
68 |
if not self.model_id:
|
69 |
-
raise ValueError(
|
70 |
if not self.output_format:
|
71 |
-
raise ValueError(
|
72 |
-
|
73 |
@property
|
74 |
def client(self) -> ElevenLabs:
|
75 |
"""
|
@@ -93,6 +97,7 @@ class ElevenLabsConfig:
|
|
93 |
|
94 |
class ElevenLabsError(Exception):
|
95 |
"""Custom exception for errors related to the ElevenLabs TTS API."""
|
|
|
96 |
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
97 |
super().__init__(message)
|
98 |
self.original_exception = original_exception
|
@@ -107,7 +112,7 @@ elevenlabs_config = ElevenLabsConfig()
|
|
107 |
wait=wait_fixed(2),
|
108 |
before=before_log(logger, logging.DEBUG),
|
109 |
after=after_log(logger, logging.DEBUG),
|
110 |
-
reraise=True
|
111 |
)
|
112 |
def text_to_speech_with_elevenlabs(text: str) -> Tuple[ElevenlabsVoiceName, bytes]:
|
113 |
"""
|
@@ -123,7 +128,9 @@ def text_to_speech_with_elevenlabs(text: str) -> Tuple[ElevenlabsVoiceName, byte
|
|
123 |
Raises:
|
124 |
ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
|
125 |
"""
|
126 |
-
logger.debug(
|
|
|
|
|
127 |
|
128 |
# Get a random voice as an enum member.
|
129 |
voice = elevenlabs_config.random_voice
|
@@ -141,22 +148,24 @@ def text_to_speech_with_elevenlabs(text: str) -> Tuple[ElevenlabsVoiceName, byte
|
|
141 |
# Attempt to combine chunks into a single bytes object.
|
142 |
# If audio_iterator is not iterable or invalid, an exception will be raised.
|
143 |
try:
|
144 |
-
audio = b
|
145 |
except Exception as iter_error:
|
146 |
-
logger.error(
|
147 |
-
raise ElevenLabsError(
|
|
|
|
|
148 |
|
149 |
# Validate audio
|
150 |
if not audio:
|
151 |
-
logger.error(
|
152 |
-
raise ElevenLabsError(
|
153 |
|
154 |
-
logger.info(f
|
155 |
return voice.voice_name, audio
|
156 |
|
157 |
except Exception as e:
|
158 |
-
logger.exception(f
|
159 |
raise ElevenLabsError(
|
160 |
-
message=f
|
161 |
original_exception=e,
|
162 |
-
)
|
|
|
35 |
from src.utils import validate_env_var
|
36 |
|
37 |
|
38 |
+
ElevenlabsVoiceName = Literal["Adam", "Antoni", "Rachel", "Matilda"]
|
39 |
+
|
40 |
|
41 |
class ElevenLabsVoice(Enum):
|
42 |
+
ADAM = ("Adam", "pNInz6obpgDQGcFmaJgB")
|
43 |
+
ANTONI = ("Antoni", "ErXwobaYiN019PkySvjV")
|
44 |
+
RACHEL = ("Rachel", "21m00Tcm4TlvDq8ikWAM")
|
45 |
+
MATILDA = ("Matilda", "XrExE9yKIg1WjnnlVkGX")
|
46 |
|
47 |
@property
|
48 |
def voice_name(self) -> ElevenlabsVoiceName:
|
|
|
58 |
@dataclass(frozen=True)
|
59 |
class ElevenLabsConfig:
|
60 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
61 |
+
|
62 |
+
api_key: str = validate_env_var("ELEVENLABS_API_KEY")
|
63 |
+
model_id: str = (
|
64 |
+
"eleven_multilingual_v2" # ElevenLab's most emotionally expressive model
|
65 |
+
)
|
66 |
+
output_format: str = "mp3_44100_128" # Output format of the generated audio
|
67 |
|
68 |
def __post_init__(self):
|
69 |
# Validate that required attributes are set
|
70 |
if not self.api_key:
|
71 |
+
raise ValueError("ElevenLabs API key is not set.")
|
72 |
if not self.model_id:
|
73 |
+
raise ValueError("ElevenLabs Model ID is not set.")
|
74 |
if not self.output_format:
|
75 |
+
raise ValueError("ElevenLabs Output Format is not set.")
|
76 |
+
|
77 |
@property
|
78 |
def client(self) -> ElevenLabs:
|
79 |
"""
|
|
|
97 |
|
98 |
class ElevenLabsError(Exception):
|
99 |
"""Custom exception for errors related to the ElevenLabs TTS API."""
|
100 |
+
|
101 |
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
102 |
super().__init__(message)
|
103 |
self.original_exception = original_exception
|
|
|
112 |
wait=wait_fixed(2),
|
113 |
before=before_log(logger, logging.DEBUG),
|
114 |
after=after_log(logger, logging.DEBUG),
|
115 |
+
reraise=True,
|
116 |
)
|
117 |
def text_to_speech_with_elevenlabs(text: str) -> Tuple[ElevenlabsVoiceName, bytes]:
|
118 |
"""
|
|
|
128 |
Raises:
|
129 |
ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
|
130 |
"""
|
131 |
+
logger.debug(
|
132 |
+
f"Synthesizing speech from text with ElevenLabs. Text length: {len(text)} characters."
|
133 |
+
)
|
134 |
|
135 |
# Get a random voice as an enum member.
|
136 |
voice = elevenlabs_config.random_voice
|
|
|
148 |
# Attempt to combine chunks into a single bytes object.
|
149 |
# If audio_iterator is not iterable or invalid, an exception will be raised.
|
150 |
try:
|
151 |
+
audio = b"".join(chunk for chunk in audio_iterator)
|
152 |
except Exception as iter_error:
|
153 |
+
logger.error("Invalid audio iterator response.")
|
154 |
+
raise ElevenLabsError(
|
155 |
+
"Invalid audio iterator received from ElevenLabs API."
|
156 |
+
) from iter_error
|
157 |
|
158 |
# Validate audio
|
159 |
if not audio:
|
160 |
+
logger.error("No audio data received from ElevenLabs API.")
|
161 |
+
raise ElevenLabsError("Empty audio data received from ElevenLabs API.")
|
162 |
|
163 |
+
logger.info(f"Received ElevenLabs audio ({len(audio)} bytes).")
|
164 |
return voice.voice_name, audio
|
165 |
|
166 |
except Exception as e:
|
167 |
+
logger.exception(f"Error synthesizing speech from text with Elevenlabs: {e}")
|
168 |
raise ElevenLabsError(
|
169 |
+
message=f"Failed to synthesize speech from text with ElevenLabs: {e}",
|
170 |
original_exception=e,
|
171 |
+
)
|
src/integrations/hume_api.py
CHANGED
@@ -33,37 +33,44 @@ from src.config import logger
|
|
33 |
from src.utils import validate_env_var, truncate_text
|
34 |
|
35 |
|
36 |
-
HumeVoiceName = Literal[
|
|
|
37 |
|
38 |
@dataclass(frozen=True)
|
39 |
class HumeConfig:
|
40 |
"""Immutable configuration for interacting with the Hume TTS API."""
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
45 |
headers: dict = None
|
46 |
|
47 |
def __post_init__(self):
|
48 |
# Validate required attributes
|
49 |
if not self.api_key:
|
50 |
-
raise ValueError(
|
51 |
if not self.tts_endpoint_url:
|
52 |
-
raise ValueError(
|
53 |
if not self.voice_names:
|
54 |
-
raise ValueError(
|
55 |
if not self.audio_format:
|
56 |
-
raise ValueError(
|
57 |
|
58 |
# Set headers dynamically after validation
|
59 |
-
object.__setattr__(
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
63 |
|
64 |
|
65 |
class HumeError(Exception):
|
66 |
"""Custom exception for errors related to the Hume TTS API."""
|
|
|
67 |
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
68 |
super().__init__(message)
|
69 |
self.original_exception = original_exception
|
@@ -78,9 +85,11 @@ hume_config = HumeConfig()
|
|
78 |
wait=wait_fixed(2),
|
79 |
before=before_log(logger, logging.DEBUG),
|
80 |
after=after_log(logger, logging.DEBUG),
|
81 |
-
reraise=True
|
82 |
)
|
83 |
-
def text_to_speech_with_hume(
|
|
|
|
|
84 |
"""
|
85 |
Synthesizes text to speech using the Hume TTS API and processes raw binary audio data.
|
86 |
|
@@ -96,13 +105,13 @@ def text_to_speech_with_hume(prompt: str, text: str, voice_name: HumeVoiceName)
|
|
96 |
Raises:
|
97 |
HumeError: If there is an error communicating with the Hume TTS API.
|
98 |
"""
|
99 |
-
logger.debug(
|
|
|
|
|
100 |
|
101 |
request_body = {
|
102 |
-
|
103 |
-
|
104 |
-
'name': voice_name
|
105 |
-
},
|
106 |
}
|
107 |
|
108 |
try:
|
@@ -115,26 +124,33 @@ def text_to_speech_with_hume(prompt: str, text: str, voice_name: HumeVoiceName)
|
|
115 |
|
116 |
# Validate response
|
117 |
if response.status_code != 200:
|
118 |
-
logger.error(
|
119 |
-
|
|
|
|
|
|
|
|
|
120 |
|
121 |
# Process response audio
|
122 |
-
if response.headers.get(
|
123 |
audio = response.content # Raw binary audio data
|
124 |
-
logger.info(f
|
125 |
return voice_name, audio
|
126 |
|
127 |
-
raise HumeError(
|
|
|
|
|
128 |
|
129 |
except Exception as e:
|
130 |
-
logger.exception(f
|
131 |
raise HumeError(
|
132 |
-
message=f
|
133 |
original_exception=e,
|
134 |
)
|
135 |
|
|
|
136 |
def get_random_hume_voice_names() -> Tuple[HumeVoiceName, HumeVoiceName]:
|
137 |
-
"""
|
138 |
Get two random Hume voice names.
|
139 |
|
140 |
Voices:
|
@@ -143,4 +159,4 @@ def get_random_hume_voice_names() -> Tuple[HumeVoiceName, HumeVoiceName]:
|
|
143 |
- STELLA
|
144 |
- DACHER
|
145 |
"""
|
146 |
-
return tuple(random.sample(hume_config.voice_names, 2))
|
|
|
33 |
from src.utils import validate_env_var, truncate_text
|
34 |
|
35 |
|
36 |
+
HumeVoiceName = Literal["ITO", "KORA", "STELLA", "DACHER"]
|
37 |
+
|
38 |
|
39 |
@dataclass(frozen=True)
|
40 |
class HumeConfig:
|
41 |
"""Immutable configuration for interacting with the Hume TTS API."""
|
42 |
+
|
43 |
+
api_key: str = validate_env_var("HUME_API_KEY")
|
44 |
+
tts_endpoint_url: str = "https://api.hume.ai/v0/tts"
|
45 |
+
voice_names: List[HumeVoiceName] = ("ITO", "KORA", "STELLA", "DACHER")
|
46 |
+
audio_format: str = "wav"
|
47 |
headers: dict = None
|
48 |
|
49 |
def __post_init__(self):
|
50 |
# Validate required attributes
|
51 |
if not self.api_key:
|
52 |
+
raise ValueError("Hume API key is not set.")
|
53 |
if not self.tts_endpoint_url:
|
54 |
+
raise ValueError("Hume TTS endpoint URL is not set.")
|
55 |
if not self.voice_names:
|
56 |
+
raise ValueError("Hume voice names list is not set.")
|
57 |
if not self.audio_format:
|
58 |
+
raise ValueError("Hume audio format is not set.")
|
59 |
|
60 |
# Set headers dynamically after validation
|
61 |
+
object.__setattr__(
|
62 |
+
self,
|
63 |
+
"headers",
|
64 |
+
{
|
65 |
+
"X-Hume-Api-Key": f"{self.api_key}",
|
66 |
+
"Content-Type": "application/json",
|
67 |
+
},
|
68 |
+
)
|
69 |
|
70 |
|
71 |
class HumeError(Exception):
|
72 |
"""Custom exception for errors related to the Hume TTS API."""
|
73 |
+
|
74 |
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
75 |
super().__init__(message)
|
76 |
self.original_exception = original_exception
|
|
|
85 |
wait=wait_fixed(2),
|
86 |
before=before_log(logger, logging.DEBUG),
|
87 |
after=after_log(logger, logging.DEBUG),
|
88 |
+
reraise=True,
|
89 |
)
|
90 |
+
def text_to_speech_with_hume(
|
91 |
+
prompt: str, text: str, voice_name: HumeVoiceName
|
92 |
+
) -> bytes:
|
93 |
"""
|
94 |
Synthesizes text to speech using the Hume TTS API and processes raw binary audio data.
|
95 |
|
|
|
105 |
Raises:
|
106 |
HumeError: If there is an error communicating with the Hume TTS API.
|
107 |
"""
|
108 |
+
logger.debug(
|
109 |
+
f"Processing TTS with Hume. Prompt length: {len(prompt)} characters. Text length: {len(text)} characters."
|
110 |
+
)
|
111 |
|
112 |
request_body = {
|
113 |
+
"text": text,
|
114 |
+
"voice": {"name": voice_name},
|
|
|
|
|
115 |
}
|
116 |
|
117 |
try:
|
|
|
124 |
|
125 |
# Validate response
|
126 |
if response.status_code != 200:
|
127 |
+
logger.error(
|
128 |
+
f"Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)"
|
129 |
+
)
|
130 |
+
raise HumeError(
|
131 |
+
f"Hume TTS API responded with status {response.status_code}: {response.text[:200]}"
|
132 |
+
)
|
133 |
|
134 |
# Process response audio
|
135 |
+
if response.headers.get("Content-Type", "").startswith("audio/"):
|
136 |
audio = response.content # Raw binary audio data
|
137 |
+
logger.info(f"Received audio data from Hume ({len(audio)} bytes).")
|
138 |
return voice_name, audio
|
139 |
|
140 |
+
raise HumeError(
|
141 |
+
f'Unexpected Content-Type: {response.headers.get("Content-Type", "Unknown")}'
|
142 |
+
)
|
143 |
|
144 |
except Exception as e:
|
145 |
+
logger.exception(f"Error synthesizing speech from text with Hume: {e}")
|
146 |
raise HumeError(
|
147 |
+
message=f"Failed to synthesize speech from text with Hume: {e}",
|
148 |
original_exception=e,
|
149 |
)
|
150 |
|
151 |
+
|
152 |
def get_random_hume_voice_names() -> Tuple[HumeVoiceName, HumeVoiceName]:
|
153 |
+
"""
|
154 |
Get two random Hume voice names.
|
155 |
|
156 |
Voices:
|
|
|
159 |
- STELLA
|
160 |
- DACHER
|
161 |
"""
|
162 |
+
return tuple(random.sample(hume_config.voice_names, 2))
|
src/theme.py
CHANGED
@@ -14,27 +14,28 @@ from collections.abc import Iterable
|
|
14 |
from gradio.themes.base import Base
|
15 |
from gradio.themes.utils import colors, fonts, sizes
|
16 |
|
|
|
17 |
class CustomTheme(Base):
|
18 |
def __init__(
|
19 |
self,
|
20 |
*,
|
21 |
-
primary_hue: colors.Color | str = colors.purple,
|
22 |
-
secondary_hue: colors.Color | str = colors.stone,
|
23 |
-
neutral_hue: colors.Color | str = colors.neutral,
|
24 |
spacing_size: sizes.Size | str = sizes.spacing_md,
|
25 |
radius_size: sizes.Size | str = sizes.radius_md,
|
26 |
text_size: sizes.Size | str = sizes.text_md,
|
27 |
font: fonts.Font | str | Iterable[fonts.Font | str] = (
|
28 |
-
fonts.GoogleFont(
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
),
|
33 |
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
|
34 |
-
fonts.GoogleFont(
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
),
|
39 |
):
|
40 |
super().__init__(
|
@@ -47,56 +48,49 @@ class CustomTheme(Base):
|
|
47 |
font=font,
|
48 |
font_mono=font_mono,
|
49 |
)
|
50 |
-
self.name =
|
51 |
super().set(
|
52 |
# --- Colors ---
|
53 |
-
error_background_fill=
|
54 |
# error_background_fill_dark='',
|
55 |
-
error_border_color=
|
56 |
# error_border_color_dark='',
|
57 |
-
error_icon_color=
|
58 |
# error_icon_color_dark='',
|
59 |
-
input_background_fill=
|
60 |
# input_background_fill_dark='',
|
61 |
-
|
62 |
# --- Shadows ---
|
63 |
-
input_shadow_focus=
|
64 |
# input_shadow_focus_dark='',
|
65 |
-
|
66 |
# --- Gradients ---
|
67 |
-
stat_background_fill=
|
68 |
# stat_background_fill_dark='',
|
69 |
-
|
70 |
# --- Button borders ---
|
71 |
-
button_border_width=
|
72 |
-
input_border_width=
|
73 |
-
|
74 |
# --- Primary Button ---
|
75 |
-
button_primary_background_fill=
|
76 |
# button_primary_background_fill_dark='',
|
77 |
-
button_primary_background_fill_hover=
|
78 |
# button_primary_background_fill_hover_dark='',
|
79 |
-
button_primary_text_color=
|
80 |
# button_primary_text_color_dark='',
|
81 |
-
|
82 |
# --- Secondary Button ---
|
83 |
-
button_secondary_background_fill=
|
84 |
# button_secondary_background_fill_dark='#4B5563',
|
85 |
-
button_secondary_background_fill_hover=
|
86 |
# button_secondary_background_fill_hover_dark='#374151',
|
87 |
-
button_secondary_text_color=
|
88 |
# button_secondary_text_color_dark='#FFFFFF',
|
89 |
-
|
90 |
# --- Cancel Button ---
|
91 |
-
button_cancel_background_fill=
|
92 |
# button_cancel_background_fill_dark='#B91C1C',
|
93 |
-
button_cancel_background_fill_hover=
|
94 |
# button_cancel_background_fill_hover_dark='#991B1B',
|
95 |
-
button_cancel_text_color=
|
96 |
# button_cancel_text_color_dark='#FFFFFF',
|
97 |
-
button_cancel_text_color_hover=
|
98 |
# button_cancel_text_color_hover_dark='#FFFFFF',
|
99 |
-
|
100 |
# --- Other ---
|
101 |
-
border_color_accent_subdued=
|
102 |
-
)
|
|
|
14 |
from gradio.themes.base import Base
|
15 |
from gradio.themes.utils import colors, fonts, sizes
|
16 |
|
17 |
+
|
18 |
class CustomTheme(Base):
|
19 |
def __init__(
|
20 |
self,
|
21 |
*,
|
22 |
+
primary_hue: colors.Color | str = colors.purple,
|
23 |
+
secondary_hue: colors.Color | str = colors.stone,
|
24 |
+
neutral_hue: colors.Color | str = colors.neutral,
|
25 |
spacing_size: sizes.Size | str = sizes.spacing_md,
|
26 |
radius_size: sizes.Size | str = sizes.radius_md,
|
27 |
text_size: sizes.Size | str = sizes.text_md,
|
28 |
font: fonts.Font | str | Iterable[fonts.Font | str] = (
|
29 |
+
fonts.GoogleFont("Source Sans Pro"),
|
30 |
+
"ui-sans-serif",
|
31 |
+
"system-ui",
|
32 |
+
"sans-serif",
|
33 |
),
|
34 |
font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
|
35 |
+
fonts.GoogleFont("IBM Plex Mono"),
|
36 |
+
"ui-monospace",
|
37 |
+
"Consolas",
|
38 |
+
"monospace",
|
39 |
),
|
40 |
):
|
41 |
super().__init__(
|
|
|
48 |
font=font,
|
49 |
font_mono=font_mono,
|
50 |
)
|
51 |
+
self.name = "custom_theme"
|
52 |
super().set(
|
53 |
# --- Colors ---
|
54 |
+
error_background_fill="#EF4444",
|
55 |
# error_background_fill_dark='',
|
56 |
+
error_border_color="#B91C1C",
|
57 |
# error_border_color_dark='',
|
58 |
+
error_icon_color="#B91C1C",
|
59 |
# error_icon_color_dark='',
|
60 |
+
input_background_fill="#F9FAFB",
|
61 |
# input_background_fill_dark='',
|
|
|
62 |
# --- Shadows ---
|
63 |
+
input_shadow_focus="0 0 0 *shadow_spread #7C3AED80, *shadow_inset",
|
64 |
# input_shadow_focus_dark='',
|
|
|
65 |
# --- Gradients ---
|
66 |
+
stat_background_fill="linear-gradient(to right, #7C3AED, #D8B4FE)",
|
67 |
# stat_background_fill_dark='',
|
|
|
68 |
# --- Button borders ---
|
69 |
+
button_border_width="0px",
|
70 |
+
input_border_width="1px",
|
|
|
71 |
# --- Primary Button ---
|
72 |
+
button_primary_background_fill="#7E22CE",
|
73 |
# button_primary_background_fill_dark='',
|
74 |
+
button_primary_background_fill_hover="#9333EA",
|
75 |
# button_primary_background_fill_hover_dark='',
|
76 |
+
button_primary_text_color="#FFFFFF",
|
77 |
# button_primary_text_color_dark='',
|
|
|
78 |
# --- Secondary Button ---
|
79 |
+
button_secondary_background_fill="#222222",
|
80 |
# button_secondary_background_fill_dark='#4B5563',
|
81 |
+
button_secondary_background_fill_hover="#3F3F3F",
|
82 |
# button_secondary_background_fill_hover_dark='#374151',
|
83 |
+
button_secondary_text_color="#FFFFFF",
|
84 |
# button_secondary_text_color_dark='#FFFFFF',
|
|
|
85 |
# --- Cancel Button ---
|
86 |
+
button_cancel_background_fill="#EF4444",
|
87 |
# button_cancel_background_fill_dark='#B91C1C',
|
88 |
+
button_cancel_background_fill_hover="#DC2626",
|
89 |
# button_cancel_background_fill_hover_dark='#991B1B',
|
90 |
+
button_cancel_text_color="#FFFFFF",
|
91 |
# button_cancel_text_color_dark='#FFFFFF',
|
92 |
+
button_cancel_text_color_hover="#FFFFFF",
|
93 |
# button_cancel_text_color_hover_dark='#FFFFFF',
|
|
|
94 |
# --- Other ---
|
95 |
+
border_color_accent_subdued="#A78BFA",
|
96 |
+
)
|
src/types.py
CHANGED
@@ -9,7 +9,7 @@ has a consistent structure including both the provider and the associated voice.
|
|
9 |
from typing import TypedDict, Literal, Dict
|
10 |
|
11 |
|
12 |
-
TTSProviderName = Literal[
|
13 |
"""TTSProviderName represents the allowed provider names for TTS services."""
|
14 |
|
15 |
|
@@ -21,14 +21,15 @@ class OptionDetails(TypedDict):
|
|
21 |
provider (TTSProviderName): The name of the provider (either 'Hume AI' or 'ElevenLabs').
|
22 |
voice (str): The name of the voice associated with the option.
|
23 |
"""
|
|
|
24 |
provider: TTSProviderName
|
25 |
voice: str
|
26 |
|
27 |
|
28 |
-
OptionKey = Literal[
|
29 |
"""OptionKey is restricted to the literal values 'Option A' or 'Option B'."""
|
30 |
|
31 |
|
32 |
OptionMap = Dict[OptionKey, OptionDetails]
|
33 |
"""OptionMap defines the structure of the options mapping, where each key is an OptionKey
|
34 |
-
and the value is an OptionDetails dictionary."""
|
|
|
9 |
from typing import TypedDict, Literal, Dict
|
10 |
|
11 |
|
12 |
+
TTSProviderName = Literal["Hume AI", "ElevenLabs", "Unknown"]
|
13 |
"""TTSProviderName represents the allowed provider names for TTS services."""
|
14 |
|
15 |
|
|
|
21 |
provider (TTSProviderName): The name of the provider (either 'Hume AI' or 'ElevenLabs').
|
22 |
voice (str): The name of the voice associated with the option.
|
23 |
"""
|
24 |
+
|
25 |
provider: TTSProviderName
|
26 |
voice: str
|
27 |
|
28 |
|
29 |
+
OptionKey = Literal["Option A", "Option B"]
|
30 |
"""OptionKey is restricted to the literal values 'Option A' or 'Option B'."""
|
31 |
|
32 |
|
33 |
OptionMap = Dict[OptionKey, OptionDetails]
|
34 |
"""OptionMap defines the structure of the options mapping, where each key is an OptionKey
|
35 |
+
and the value is an OptionDetails dictionary."""
|
src/utils.py
CHANGED
@@ -37,14 +37,14 @@ def truncate_text(text: str, max_length: int = 50) -> str:
|
|
37 |
''
|
38 |
"""
|
39 |
if max_length <= 0:
|
40 |
-
logger.warning(f
|
41 |
-
return
|
42 |
|
43 |
is_truncated = len(text) > max_length
|
44 |
if is_truncated:
|
45 |
-
logger.debug(f
|
46 |
-
|
47 |
-
return text[:max_length] + (
|
48 |
|
49 |
|
50 |
def validate_env_var(var_name: str) -> str:
|
@@ -59,7 +59,7 @@ def validate_env_var(var_name: str) -> str:
|
|
59 |
|
60 |
Raises:
|
61 |
ValueError: If the environment variable is not set.
|
62 |
-
|
63 |
Examples:
|
64 |
>>> import os
|
65 |
>>> os.environ["EXAMPLE_VAR"] = "example_value"
|
@@ -71,9 +71,11 @@ def validate_env_var(var_name: str) -> str:
|
|
71 |
...
|
72 |
ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
|
73 |
"""
|
74 |
-
value = os.environ.get(var_name,
|
75 |
if not value:
|
76 |
-
raise ValueError(
|
|
|
|
|
77 |
return value
|
78 |
|
79 |
|
@@ -98,17 +100,19 @@ def validate_prompt_length(prompt: str, max_length: int, min_length: int) -> Non
|
|
98 |
"""
|
99 |
stripped_prompt = prompt.strip()
|
100 |
prompt_length = len(stripped_prompt)
|
101 |
-
|
102 |
-
logger.debug(f
|
103 |
|
104 |
if prompt_length < min_length:
|
105 |
raise ValueError(
|
106 |
-
f
|
107 |
-
f
|
108 |
)
|
109 |
if prompt_length > max_length:
|
110 |
raise ValueError(
|
111 |
-
f
|
112 |
-
f
|
113 |
)
|
114 |
-
logger.debug(
|
|
|
|
|
|
37 |
''
|
38 |
"""
|
39 |
if max_length <= 0:
|
40 |
+
logger.warning(f"Invalid max_length={max_length}. Returning empty string.")
|
41 |
+
return ""
|
42 |
|
43 |
is_truncated = len(text) > max_length
|
44 |
if is_truncated:
|
45 |
+
logger.debug(f"Truncated text to {max_length} characters.")
|
46 |
+
|
47 |
+
return text[:max_length] + ("..." if is_truncated else "")
|
48 |
|
49 |
|
50 |
def validate_env_var(var_name: str) -> str:
|
|
|
59 |
|
60 |
Raises:
|
61 |
ValueError: If the environment variable is not set.
|
62 |
+
|
63 |
Examples:
|
64 |
>>> import os
|
65 |
>>> os.environ["EXAMPLE_VAR"] = "example_value"
|
|
|
71 |
...
|
72 |
ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
|
73 |
"""
|
74 |
+
value = os.environ.get(var_name, "")
|
75 |
if not value:
|
76 |
+
raise ValueError(
|
77 |
+
f"{var_name} is not set. Please ensure it is defined in your environment variables."
|
78 |
+
)
|
79 |
return value
|
80 |
|
81 |
|
|
|
100 |
"""
|
101 |
stripped_prompt = prompt.strip()
|
102 |
prompt_length = len(stripped_prompt)
|
103 |
+
|
104 |
+
logger.debug(f"Prompt length being validated: {prompt_length} characters")
|
105 |
|
106 |
if prompt_length < min_length:
|
107 |
raise ValueError(
|
108 |
+
f"Your prompt is too short. Please enter at least {min_length} characters. "
|
109 |
+
f"(Current length: {prompt_length})"
|
110 |
)
|
111 |
if prompt_length > max_length:
|
112 |
raise ValueError(
|
113 |
+
f"Your prompt is too long. Please limit it to {max_length} characters. "
|
114 |
+
f"(Current length: {prompt_length})"
|
115 |
)
|
116 |
+
logger.debug(
|
117 |
+
f"Prompt length validation passed for prompt: {truncate_text(stripped_prompt)}"
|
118 |
+
)
|