Spaces:

HumeAI
/

expressive-tts-arena

Running

App Files Files Community

zach commited on Feb 3

Commit

d1ed6b1

1 Parent(s): 84c63d1

Add formatter (black) and format all code in src directory— using black default formatting configuration

Browse files

Files changed (12) hide show

requirements.txt +4 -0
src/__init__.py +1 -1
src/app.py +164 -111
src/config.py +5 -5
src/constants.py +27 -36
src/integrations/__init__.py +1 -1
src/integrations/anthropic_api.py +30 -22
src/integrations/elevenlabs_api.py +32 -23
src/integrations/hume_api.py +45 -29
src/theme.py +33 -39
src/types.py +4 -3
src/utils.py +19 -15

requirements.txt CHANGED Viewed

@@ -2,6 +2,7 @@ aiofiles==23.2.1
 annotated-types==0.7.0
 anthropic==0.45.2
 anyio==4.8.0
 certifi==2024.12.14
 charset-normalizer==3.4.1
 click==8.1.8
@@ -23,11 +24,14 @@ jiter==0.8.2
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
 mdurl==0.1.2
 numpy==2.2.2
 orjson==3.10.15
 packaging==24.2
 pandas==2.2.3
 pillow==11.1.0
 pydantic==2.10.6
 pydantic_core==2.27.2
 pydub==0.25.1

 annotated-types==0.7.0
 anthropic==0.45.2
 anyio==4.8.0
+black==25.1.0
 certifi==2024.12.14
 charset-normalizer==3.4.1
 click==8.1.8
 markdown-it-py==3.0.0
 MarkupSafe==2.1.5
 mdurl==0.1.2
+mypy-extensions==1.0.0
 numpy==2.2.2
 orjson==3.10.15
 packaging==24.2
 pandas==2.2.3
+pathspec==0.12.1
 pillow==11.1.0
+platformdirs==4.3.6
 pydantic==2.10.6
 pydantic_core==2.27.2
 pydub==0.25.1

src/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 """
 The root package. This package includes the Gradio application,
 integrations with external APIs, and utility functions.
-"""

 """
 The root package. This package includes the Gradio application,
 integrations with external APIs, and utility functions.
+"""

src/app.py CHANGED Viewed

@@ -21,10 +21,10 @@ from src.config import logger
 from src.constants import (
     ELEVENLABS,
     HUME_AI,
-    OPTION_A,
     OPTION_B,
-    PROMPT_MAX_LENGTH,
-    PROMPT_MIN_LENGTH,
     SAMPLE_PROMPTS,
     TROPHY_EMOJI,
     UNKNOWN_PROVIDER,
@@ -45,7 +45,9 @@ from src.types import OptionMap
 from src.utils import truncate_text, validate_prompt_length
-def generate_text(prompt: str,) -> Tuple[Union[str, gr.update], gr.update]:
     """
     Validates the prompt and generates text using Anthropic API.
@@ -56,29 +58,33 @@ def generate_text(prompt: str,) -> Tuple[Union[str, gr.update], gr.update]:
         Tuple containing:
           - The generated text (as a gr.update).
           - An update for the generated text state.
     Raises:
         gr.Error: On validation or API errors.
     """
     try:
         validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
     except ValueError as ve:
-        logger.warning(f'Validation error: {ve}')
         raise gr.Error(str(ve))
     try:
         generated_text = generate_text_with_claude(prompt)
-        logger.info(f'Generated text ({len(generated_text)} characters).')
         return gr.update(value=generated_text), generated_text
     except AnthropicError as ae:
-        logger.error(f'AnthropicError while generating text: {str(ae)}')
-        raise gr.Error('There was an issue communicating with the Anthropic API. Please try again later.')
     except Exception as e:
-        logger.error(f'Unexpected error while generating text: {e}')
-        raise gr.Error('Failed to generate text. Please try again.')
-def text_to_speech(prompt: str, text: str, generated_text_state: str) -> Tuple[gr.update, gr.update, dict, Union[str, None]]:
     """
     Synthesizes two text to speech outputs, loads the two audio players with the
     output audio, and updates related UI state components.
@@ -95,46 +101,51 @@ def text_to_speech(prompt: str, text: str, generated_text_state: str) -> Tuple[g
          - Update for second audio player
          - A dictionary mapping options to providers
          - The raw audio value for option B
     Raises:
         gr.Error: On API or unexpected errors.
     """
     if not text:
-        logger.warning('Skipping text-to-speech due to empty text.')
-        raise gr.Error('Please generate or enter text to synthesize.')
     # If not using generated text, then only compare Hume to Hume
-    compare_hume_with_elevenlabs = (text == generated_text_state) and (random.random() < 0.5)
     # Pre-select two Hume voices pre-emptively in case we compare Hume to Hume to ensure we do not select the same voice twice.
     hume_voice_a, hume_voice_b = get_random_hume_voice_names()
     try:
         with ThreadPoolExecutor(max_workers=2) as executor:
             provider_a = HUME_AI
-            future_audio_a = executor.submit(text_to_speech_with_hume, prompt, text, hume_voice_a)
             if compare_hume_with_elevenlabs:
                 provider_b = ELEVENLABS
                 future_audio_b = executor.submit(text_to_speech_with_elevenlabs, text)
             else:
                 provider_b = HUME_AI
-                future_audio_b = executor.submit(text_to_speech_with_hume, prompt, text, hume_voice_b)
             voice_a, audio_a = future_audio_a.result()
             voice_b, audio_b = future_audio_b.result()
-        logger.info(f'TTS generated: {provider_a}={len(audio_a)} bytes, {provider_b}={len(audio_b)} bytes')
         options = [
             (audio_a, {"provider": provider_a, "voice": voice_a}),
-            (audio_b, {"provider": provider_b, "voice": voice_b})
         ]
         random.shuffle(options)
         option_a_audio, option_b_audio = options[0][0], options[1][0]
-        options_map: OptionMap = {
-            OPTION_A: options[0][1],
-            OPTION_B: options[1][1]
-        }
         return (
             gr.update(value=option_a_audio, visible=True, autoplay=True),
@@ -143,17 +154,23 @@ def text_to_speech(prompt: str, text: str, generated_text_state: str) -> Tuple[g
             option_b_audio,
         )
     except ElevenLabsError as ee:
-        logger.error(f'ElevenLabsError while synthesizing speech from text: {str(ee)}')
-        raise gr.Error('There was an issue communicating with the Elevenlabs API. Please try again later.')
     except HumeError as he:
-        logger.error(f'HumeError while synthesizing speech from text: {str(he)}')
-        raise gr.Error('There was an issue communicating with the Hume API. Please try again later.')
     except Exception as e:
-        logger.error(f'Unexpected error during TTS generation: {e}')
-        raise gr.Error('An unexpected error ocurred. Please try again later.')
-def vote(vote_submitted: bool, option_map: OptionMap, selected_button: str) -> Tuple[bool, gr.update, gr.update, gr.update]:
     """
     Handles user voting.
@@ -178,17 +195,19 @@ def vote(vote_submitted: bool, option_map: OptionMap, selected_button: str) -> T
         return gr.skip(), gr.skip(), gr.skip(), gr.skip()
     option_a_selected = selected_button == VOTE_FOR_OPTION_A
-    selected_option, other_option = (OPTION_A, OPTION_B) if option_a_selected else (OPTION_B, OPTION_A)
     # Parse selected option details from options map
     selected_details = option_map.get(selected_option, {})
-    selected_provider = selected_details.get('provider', UNKNOWN_PROVIDER)
-    selected_voice = selected_details.get('voice', '')
     # Parse other option details from options map
     other_details = option_map.get(other_option, {})
-    other_provider = other_details.get('provider', UNKNOWN_PROVIDER)
-    other_voice = other_details.get('voice', '')
     # Build button labels, displaying the provider and voice name, appending the trophy emoji to the selected option.
     selected_label = f"{selected_provider} | Voice: {selected_voice} {TROPHY_EMOJI}"
@@ -196,11 +215,17 @@ def vote(vote_submitted: bool, option_map: OptionMap, selected_button: str) -> T
     return (
         True,
-        gr.update(value=selected_label, variant='primary', interactive=False) if option_a_selected
-            else gr.update(value=other_label, variant='secondary', interactive=False),
-        gr.update(value=other_label, variant='secondary', interactive=False) if option_a_selected
-            else gr.update(value=selected_label, variant='primary', interactive=False),
-        gr.update(interactive=True)
     )
@@ -221,8 +246,8 @@ def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, None,
     return (
         gr.update(value=None),
         gr.update(value=None),
-        gr.update(value=VOTE_FOR_OPTION_A, variant='secondary'),
-        gr.update(value=VOTE_FOR_OPTION_B, variant='secondary'),
         None,
         None,
         False,
@@ -230,41 +255,45 @@ def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, None,
 def build_input_section() -> Tuple[gr.Markdown, gr.Dropdown, gr.Textbox, gr.Button]:
-    """ Builds the input section including instructions, sample prompt dropdown, prompt input, and generate button """
-    instructions = gr.Markdown("""
         1. **Enter or Generate Text:** Type directly in the Text box, or optionally enter a Prompt, click "Generate text", and edit if needed.
         2. **Synthesize Speech:** Click "Synthesize speech" to generate two audio outputs.
         3. **Listen & Compare:** Playback both options (A & B) to hear the differences.
         4. **Vote for Your Favorite:** Click "Vote for option A" or "Vote for option B" to choose the best one.
-    """)
     sample_prompt_dropdown = gr.Dropdown(
         choices=list(SAMPLE_PROMPTS.keys()),
-        label='Choose a sample prompt (or enter your own)',
         value=None,
         interactive=True,
     )
     prompt_input = gr.Textbox(
-        label='Prompt',
-        placeholder='Enter your prompt...',
         lines=2,
         max_lines=2,
         max_length=PROMPT_MAX_LENGTH,
         show_copy_button=True,
     )
-    generate_text_button = gr.Button('Generate text', variant='secondary')
     return (
-        instructions,
-        sample_prompt_dropdown,
-        prompt_input,
         generate_text_button,
     )
-def build_output_section() -> Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, gr.Button, gr.Button]:
-    """ Builds the output section including generated text, audio players, and vote buttons. """
     text_input = gr.Textbox(
-        label='Text',
-        placeholder='Enter text to synthesize speech...',
         interactive=True,
         autoscroll=False,
         lines=4,
@@ -272,19 +301,23 @@ def build_output_section() -> Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, g
         max_length=PROMPT_MAX_LENGTH,
         show_copy_button=True,
     )
-    synthesize_speech_button = gr.Button('Synthesize speech', variant='primary')
     with gr.Row(equal_height=True):
-        option_a_audio_player = gr.Audio(label=OPTION_A, type='filepath', interactive=False)
-        option_b_audio_player = gr.Audio(label=OPTION_B, type='filepath', interactive=False)
     with gr.Row(equal_height=True):
         vote_button_a = gr.Button(VOTE_FOR_OPTION_A, interactive=False)
         vote_button_b = gr.Button(VOTE_FOR_OPTION_B, interactive=False)
     return (
-        text_input,
-        synthesize_speech_button,
-        option_a_audio_player,
-        option_b_audio_player,
-        vote_button_a,
         vote_button_b,
     )
@@ -298,39 +331,45 @@ def build_gradio_interface() -> gr.Blocks:
     """
     custom_theme = CustomTheme()
     with gr.Blocks(
-        title='Expressive TTS Arena',
-        theme=custom_theme,
-        fill_width=True,
-        css_paths='src/assets/styles.css',
     ) as demo:
         # Title
-        gr.Markdown('# Expressive TTS Arena')
         # Build generate text section
-        (instructions,
-        sample_prompt_dropdown,
-        prompt_input,
-        generate_text_button) = build_input_section()
         # Build synthesize speech section
-        (text_input,
-        synthesize_speech_button,
-        option_a_audio_player,
-        option_b_audio_player,
-        vote_button_a,
-        vote_button_b) = build_output_section()
-        # UI state components
-        generated_text_state = gr.State('')     # Track generated text state
-        option_b_audio_state = gr.State()       # Track generated audio for option B for playing automatically after option 1 audio finishes
-        option_map_state = gr.State()           # Track option map (option A and option B are randomized)
-        vote_submitted_state = gr.State(False)  # Track whether the user has voted for an option
         # --- Register event handlers ---
         # When a sample prompt is chosen, update the prompt textbox
         sample_prompt_dropdown.change(
-            fn=lambda choice: SAMPLE_PROMPTS.get(choice, ''),
             inputs=[sample_prompt_dropdown],
             outputs=[prompt_input],
         )
@@ -342,7 +381,7 @@ def build_gradio_interface() -> gr.Blocks:
         generate_text_button.click(
             fn=lambda: gr.update(interactive=False),
             inputs=[],
-            outputs=[generate_text_button]
         ).then(
             fn=generate_text,
             inputs=[prompt_input],
@@ -350,9 +389,9 @@ def build_gradio_interface() -> gr.Blocks:
         ).then(
             fn=lambda: gr.update(interactive=True),
             inputs=[],
-            outputs=[generate_text_button]
         )
         # Synthesize speech button click event handler chain:
         # 1. Disable the "Synthesize speech" button
         # 2. Reset UI state
@@ -361,48 +400,58 @@ def build_gradio_interface() -> gr.Blocks:
         synthesize_speech_button.click(
             fn=lambda: gr.update(interactive=False),
             inputs=[],
-            outputs=[synthesize_speech_button]
         ).then(
             fn=reset_ui,
             inputs=[],
             outputs=[
                 option_a_audio_player,
                 option_b_audio_player,
-                vote_button_a,
-                vote_button_b,
-                option_map_state,
-                option_b_audio_state,
                 vote_submitted_state,
             ],
         ).then(
             fn=text_to_speech,
             inputs=[prompt_input, text_input, generated_text_state],
             outputs=[
-                option_a_audio_player,
-                option_b_audio_player,
-                option_map_state,
                 option_b_audio_state,
             ],
         ).then(
             fn=lambda: (
-                gr.update(interactive=True),
-                gr.update(interactive=True),
-                gr.update(interactive=True)
             ),
             inputs=[],
-            outputs=[synthesize_speech_button, vote_button_a, vote_button_b]
         )
         # Vote button click event handlers
         vote_button_a.click(
             fn=vote,
             inputs=[vote_submitted_state, option_map_state, vote_button_a],
-            outputs=[vote_submitted_state, vote_button_a, vote_button_b, synthesize_speech_button],
         )
         vote_button_b.click(
             fn=vote,
             inputs=[vote_submitted_state, option_map_state, vote_button_b],
-            outputs=[vote_submitted_state, vote_button_a, vote_button_b, synthesize_speech_button],
         )
         # Auto-play second audio after first finishes (Workaround to play audio back-to-back)
@@ -421,16 +470,20 @@ def build_gradio_interface() -> gr.Blocks:
         # Enable voting after second audio option playback finishes
         option_b_audio_player.stop(
-            fn=lambda _: (gr.update(interactive=True), gr.update(interactive=True), gr.update(autoplay=False)),
             inputs=[],
             outputs=[vote_button_a, vote_button_b, option_b_audio_player],
         )
-    logger.debug('Gradio interface built successfully')
     return demo
-if __name__ == '__main__':
-    logger.info('Launching TTS Arena Gradio app...')
     demo = build_gradio_interface()
-    demo.launch()

 from src.constants import (
     ELEVENLABS,
     HUME_AI,
+    OPTION_A,
     OPTION_B,
+    PROMPT_MAX_LENGTH,
+    PROMPT_MIN_LENGTH,
     SAMPLE_PROMPTS,
     TROPHY_EMOJI,
     UNKNOWN_PROVIDER,
 from src.utils import truncate_text, validate_prompt_length
+def generate_text(
+    prompt: str,
+) -> Tuple[Union[str, gr.update], gr.update]:
     """
     Validates the prompt and generates text using Anthropic API.
         Tuple containing:
           - The generated text (as a gr.update).
           - An update for the generated text state.
     Raises:
         gr.Error: On validation or API errors.
     """
     try:
         validate_prompt_length(prompt, PROMPT_MAX_LENGTH, PROMPT_MIN_LENGTH)
     except ValueError as ve:
+        logger.warning(f"Validation error: {ve}")
         raise gr.Error(str(ve))
     try:
         generated_text = generate_text_with_claude(prompt)
+        logger.info(f"Generated text ({len(generated_text)} characters).")
         return gr.update(value=generated_text), generated_text
     except AnthropicError as ae:
+        logger.error(f"AnthropicError while generating text: {str(ae)}")
+        raise gr.Error(
+            "There was an issue communicating with the Anthropic API. Please try again later."
+        )
     except Exception as e:
+        logger.error(f"Unexpected error while generating text: {e}")
+        raise gr.Error("Failed to generate text. Please try again.")
+def text_to_speech(
+    prompt: str, text: str, generated_text_state: str
+) -> Tuple[gr.update, gr.update, dict, Union[str, None]]:
     """
     Synthesizes two text to speech outputs, loads the two audio players with the
     output audio, and updates related UI state components.
          - Update for second audio player
          - A dictionary mapping options to providers
          - The raw audio value for option B
     Raises:
         gr.Error: On API or unexpected errors.
     """
     if not text:
+        logger.warning("Skipping text-to-speech due to empty text.")
+        raise gr.Error("Please generate or enter text to synthesize.")
     # If not using generated text, then only compare Hume to Hume
+    compare_hume_with_elevenlabs = (text == generated_text_state) and (
+        random.random() < 0.5
+    )
     # Pre-select two Hume voices pre-emptively in case we compare Hume to Hume to ensure we do not select the same voice twice.
     hume_voice_a, hume_voice_b = get_random_hume_voice_names()
     try:
         with ThreadPoolExecutor(max_workers=2) as executor:
             provider_a = HUME_AI
+            future_audio_a = executor.submit(
+                text_to_speech_with_hume, prompt, text, hume_voice_a
+            )
             if compare_hume_with_elevenlabs:
                 provider_b = ELEVENLABS
                 future_audio_b = executor.submit(text_to_speech_with_elevenlabs, text)
             else:
                 provider_b = HUME_AI
+                future_audio_b = executor.submit(
+                    text_to_speech_with_hume, prompt, text, hume_voice_b
+                )
             voice_a, audio_a = future_audio_a.result()
             voice_b, audio_b = future_audio_b.result()
+        logger.info(
+            f"TTS generated: {provider_a}={len(audio_a)} bytes, {provider_b}={len(audio_b)} bytes"
+        )
         options = [
             (audio_a, {"provider": provider_a, "voice": voice_a}),
+            (audio_b, {"provider": provider_b, "voice": voice_b}),
         ]
         random.shuffle(options)
         option_a_audio, option_b_audio = options[0][0], options[1][0]
+        options_map: OptionMap = {OPTION_A: options[0][1], OPTION_B: options[1][1]}
         return (
             gr.update(value=option_a_audio, visible=True, autoplay=True),
             option_b_audio,
         )
     except ElevenLabsError as ee:
+        logger.error(f"ElevenLabsError while synthesizing speech from text: {str(ee)}")
+        raise gr.Error(
+            "There was an issue communicating with the Elevenlabs API. Please try again later."
+        )
     except HumeError as he:
+        logger.error(f"HumeError while synthesizing speech from text: {str(he)}")
+        raise gr.Error(
+            "There was an issue communicating with the Hume API. Please try again later."
+        )
     except Exception as e:
+        logger.error(f"Unexpected error during TTS generation: {e}")
+        raise gr.Error("An unexpected error ocurred. Please try again later.")
+def vote(
+    vote_submitted: bool, option_map: OptionMap, selected_button: str
+) -> Tuple[bool, gr.update, gr.update, gr.update]:
     """
     Handles user voting.
         return gr.skip(), gr.skip(), gr.skip(), gr.skip()
     option_a_selected = selected_button == VOTE_FOR_OPTION_A
+    selected_option, other_option = (
+        (OPTION_A, OPTION_B) if option_a_selected else (OPTION_B, OPTION_A)
+    )
     # Parse selected option details from options map
     selected_details = option_map.get(selected_option, {})
+    selected_provider = selected_details.get("provider", UNKNOWN_PROVIDER)
+    selected_voice = selected_details.get("voice", "")
     # Parse other option details from options map
     other_details = option_map.get(other_option, {})
+    other_provider = other_details.get("provider", UNKNOWN_PROVIDER)
+    other_voice = other_details.get("voice", "")
     # Build button labels, displaying the provider and voice name, appending the trophy emoji to the selected option.
     selected_label = f"{selected_provider} | Voice: {selected_voice} {TROPHY_EMOJI}"
     return (
         True,
+        (
+            gr.update(value=selected_label, variant="primary", interactive=False)
+            if option_a_selected
+            else gr.update(value=other_label, variant="secondary", interactive=False)
+        ),
+        (
+            gr.update(value=other_label, variant="secondary", interactive=False)
+            if option_a_selected
+            else gr.update(value=selected_label, variant="primary", interactive=False)
+        ),
+        gr.update(interactive=True),
     )
     return (
         gr.update(value=None),
         gr.update(value=None),
+        gr.update(value=VOTE_FOR_OPTION_A, variant="secondary"),
+        gr.update(value=VOTE_FOR_OPTION_B, variant="secondary"),
         None,
         None,
         False,
 def build_input_section() -> Tuple[gr.Markdown, gr.Dropdown, gr.Textbox, gr.Button]:
+    """Builds the input section including instructions, sample prompt dropdown, prompt input, and generate button"""
+    instructions = gr.Markdown(
+        """
         1. **Enter or Generate Text:** Type directly in the Text box, or optionally enter a Prompt, click "Generate text", and edit if needed.
         2. **Synthesize Speech:** Click "Synthesize speech" to generate two audio outputs.
         3. **Listen & Compare:** Playback both options (A & B) to hear the differences.
         4. **Vote for Your Favorite:** Click "Vote for option A" or "Vote for option B" to choose the best one.
+    """
+    )
     sample_prompt_dropdown = gr.Dropdown(
         choices=list(SAMPLE_PROMPTS.keys()),
+        label="Choose a sample prompt (or enter your own)",
         value=None,
         interactive=True,
     )
     prompt_input = gr.Textbox(
+        label="Prompt",
+        placeholder="Enter your prompt...",
         lines=2,
         max_lines=2,
         max_length=PROMPT_MAX_LENGTH,
         show_copy_button=True,
     )
+    generate_text_button = gr.Button("Generate text", variant="secondary")
     return (
+        instructions,
+        sample_prompt_dropdown,
+        prompt_input,
         generate_text_button,
     )
+def build_output_section() -> (
+    Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, gr.Button, gr.Button]
+):
+    """Builds the output section including generated text, audio players, and vote buttons."""
     text_input = gr.Textbox(
+        label="Text",
+        placeholder="Enter text to synthesize speech...",
         interactive=True,
         autoscroll=False,
         lines=4,
         max_length=PROMPT_MAX_LENGTH,
         show_copy_button=True,
     )
+    synthesize_speech_button = gr.Button("Synthesize speech", variant="primary")
     with gr.Row(equal_height=True):
+        option_a_audio_player = gr.Audio(
+            label=OPTION_A, type="filepath", interactive=False
+        )
+        option_b_audio_player = gr.Audio(
+            label=OPTION_B, type="filepath", interactive=False
+        )
     with gr.Row(equal_height=True):
         vote_button_a = gr.Button(VOTE_FOR_OPTION_A, interactive=False)
         vote_button_b = gr.Button(VOTE_FOR_OPTION_B, interactive=False)
     return (
+        text_input,
+        synthesize_speech_button,
+        option_a_audio_player,
+        option_b_audio_player,
+        vote_button_a,
         vote_button_b,
     )
     """
     custom_theme = CustomTheme()
     with gr.Blocks(
+        title="Expressive TTS Arena",
+        theme=custom_theme,
+        fill_width=True,
+        css_paths="src/assets/styles.css",
     ) as demo:
         # Title
+        gr.Markdown("# Expressive TTS Arena")
         # Build generate text section
+        (instructions, sample_prompt_dropdown, prompt_input, generate_text_button) = (
+            build_input_section()
+        )
         # Build synthesize speech section
+        (
+            text_input,
+            synthesize_speech_button,
+            option_a_audio_player,
+            option_b_audio_player,
+            vote_button_a,
+            vote_button_b,
+        ) = build_output_section()
+        # --- UI state components ---
+        # Track generated text state
+        generated_text_state = gr.State("")
+        # Track generated audio for option B for playing automatically after option 1 audio finishes
+        option_b_audio_state = gr.State()
+        # Track option map (option A and option B are randomized)
+        option_map_state = gr.State()
+        # Track whether the user has voted for an option
+        vote_submitted_state = gr.State(False)
         # --- Register event handlers ---
         # When a sample prompt is chosen, update the prompt textbox
         sample_prompt_dropdown.change(
+            fn=lambda choice: SAMPLE_PROMPTS.get(choice, ""),
             inputs=[sample_prompt_dropdown],
             outputs=[prompt_input],
         )
         generate_text_button.click(
             fn=lambda: gr.update(interactive=False),
             inputs=[],
+            outputs=[generate_text_button],
         ).then(
             fn=generate_text,
             inputs=[prompt_input],
         ).then(
             fn=lambda: gr.update(interactive=True),
             inputs=[],
+            outputs=[generate_text_button],
         )
         # Synthesize speech button click event handler chain:
         # 1. Disable the "Synthesize speech" button
         # 2. Reset UI state
         synthesize_speech_button.click(
             fn=lambda: gr.update(interactive=False),
             inputs=[],
+            outputs=[synthesize_speech_button],
         ).then(
             fn=reset_ui,
             inputs=[],
             outputs=[
                 option_a_audio_player,
                 option_b_audio_player,
+                vote_button_a,
+                vote_button_b,
+                option_map_state,
+                option_b_audio_state,
                 vote_submitted_state,
             ],
         ).then(
             fn=text_to_speech,
             inputs=[prompt_input, text_input, generated_text_state],
             outputs=[
+                option_a_audio_player,
+                option_b_audio_player,
+                option_map_state,
                 option_b_audio_state,
             ],
         ).then(
             fn=lambda: (
+                gr.update(interactive=True),
+                gr.update(interactive=True),
+                gr.update(interactive=True),
             ),
             inputs=[],
+            outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
         )
         # Vote button click event handlers
         vote_button_a.click(
             fn=vote,
             inputs=[vote_submitted_state, option_map_state, vote_button_a],
+            outputs=[
+                vote_submitted_state,
+                vote_button_a,
+                vote_button_b,
+                synthesize_speech_button,
+            ],
         )
         vote_button_b.click(
             fn=vote,
             inputs=[vote_submitted_state, option_map_state, vote_button_b],
+            outputs=[
+                vote_submitted_state,
+                vote_button_a,
+                vote_button_b,
+                synthesize_speech_button,
+            ],
         )
         # Auto-play second audio after first finishes (Workaround to play audio back-to-back)
         # Enable voting after second audio option playback finishes
         option_b_audio_player.stop(
+            fn=lambda _: (
+                gr.update(interactive=True),
+                gr.update(interactive=True),
+                gr.update(autoplay=False),
+            ),
             inputs=[],
             outputs=[vote_button_a, vote_button_b, option_b_audio_player],
         )
+    logger.debug("Gradio interface built successfully")
     return demo
+if __name__ == "__main__":
+    logger.info("Launching TTS Arena Gradio app...")
     demo = build_gradio_interface()
+    demo.launch()

src/config.py CHANGED Viewed

@@ -22,10 +22,10 @@ load_dotenv()
 # Enable debugging mode based on an environment variable
-debug_raw = os.getenv('DEBUG', 'false').lower()
-if debug_raw not in {'true', 'false'}:
     print(f'Warning: Invalid DEBUG value "{debug_raw}". Defaulting to "false".')
-DEBUG = debug_raw == 'true'
 # Configure the logger
@@ -33,8 +33,8 @@ logging.basicConfig(
     level=logging.DEBUG if DEBUG else logging.INFO,
     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 )
-logger: logging.Logger = logging.getLogger('tts_arena')
 logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
 if DEBUG:
-    logger.debug(f'DEBUG mode enabled.')

 # Enable debugging mode based on an environment variable
+debug_raw = os.getenv("DEBUG", "false").lower()
+if debug_raw not in {"true", "false"}:
     print(f'Warning: Invalid DEBUG value "{debug_raw}". Defaulting to "false".')
+DEBUG = debug_raw == "true"
 # Configure the logger
     level=logging.DEBUG if DEBUG else logging.INFO,
     format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 )
+logger: logging.Logger = logging.getLogger("tts_arena")
 logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
 if DEBUG:
+    logger.debug(f"DEBUG mode enabled.")

src/constants.py CHANGED Viewed

@@ -7,47 +7,38 @@ This module defines global constants used throughout the project.
 from src.types import OptionKey, TTSProviderName
 # UI constants
-HUME_AI: TTSProviderName = 'Hume AI'
-ELEVENLABS: TTSProviderName = 'ElevenLabs'
-UNKNOWN_PROVIDER: TTSProviderName = 'Unknown'
 PROMPT_MIN_LENGTH: int = 10
 PROMPT_MAX_LENGTH: int = 400
-OPTION_A: OptionKey = 'Option A'
-OPTION_B: OptionKey = 'Option B'
-TROPHY_EMOJI: str = '🏆'
-VOTE_FOR_OPTION_A: str = 'Vote for option A'
-VOTE_FOR_OPTION_B: str = 'Vote for option B'
-# A collection of pre-defined prompts categorized by theme, used to provide users with
 # inspiration for generating creative text for expressive TTS.
 SAMPLE_PROMPTS: dict = {
-    '🚀 Dramatic Monologue (Stranded Astronaut)':
-        'Write a short dramatic monologue from a lone astronaut stranded on Mars, speaking to '
-        'mission control for the last time. The tone should be reflective and filled with awe, conveying '
-        'resignation and finality. Describe the Martian landscape and their thoughts in a way that '
-        'would evoke emotion and depth.',
-    '📜 Poetic Sonnet (The Passage of Time)':
-        'Compose a concise sonnet about the passage of time, using vivid imagery and a flowing, '
-        'melodic rhythm. The poem should evoke the contrast between fleeting moments and eternity, '
-        'capturing both beauty and melancholy, with natural pacing for speech delivery.',
-    "🐱 Whimsical Children's Story (Talking Cat)":
-        'Tell a short, whimsical bedtime story about a mischievous talking cat who sneaks into a grand '
-        'wizard’s library at night and accidentally casts a spell that brings the books to life. Keep the '
-        'tone playful and filled with wonder, ensuring the language flows smoothly.',
-    '🔥 Intense Speech (Freedom & Justice)':
-        'Write a powerful, impassioned speech from a rebel leader rallying their people against a '
-        'tyrant. The speech should be urgent, filled with conviction, and call for freedom and justice, '
-        'making sure the emotional intensity is evident in the phrasing.',
-    '👻 Mysterious Horror Scene (Haunted Lighthouse)':
-        'Describe a chilling ghostly encounter in an abandoned lighthouse on a foggy night. The '
-        'protagonist, alone and cold, hears whispers from the shadows, telling them secrets they were '
-        'never meant to know. Use language that builds suspense and tension, ensuring it sounds '
-        'haunting and engaging.'
-}

 from src.types import OptionKey, TTSProviderName
 # UI constants
+HUME_AI: TTSProviderName = "Hume AI"
+ELEVENLABS: TTSProviderName = "ElevenLabs"
+UNKNOWN_PROVIDER: TTSProviderName = "Unknown"
 PROMPT_MIN_LENGTH: int = 10
 PROMPT_MAX_LENGTH: int = 400
+OPTION_A: OptionKey = "Option A"
+OPTION_B: OptionKey = "Option B"
+TROPHY_EMOJI: str = "🏆"
+VOTE_FOR_OPTION_A: str = "Vote for option A"
+VOTE_FOR_OPTION_B: str = "Vote for option B"
+# A collection of pre-defined prompts categorized by theme, used to provide users with
 # inspiration for generating creative text for expressive TTS.
 SAMPLE_PROMPTS: dict = {
+    "🚀 Dramatic Monologue (Stranded Astronaut)": "Write a short dramatic monologue from a lone astronaut stranded on Mars, speaking to "
+    "mission control for the last time. The tone should be reflective and filled with awe, conveying "
+    "resignation and finality. Describe the Martian landscape and their thoughts in a way that "
+    "would evoke emotion and depth.",
+    "📜 Poetic Sonnet (The Passage of Time)": "Compose a concise sonnet about the passage of time, using vivid imagery and a flowing, "
+    "melodic rhythm. The poem should evoke the contrast between fleeting moments and eternity, "
+    "capturing both beauty and melancholy, with natural pacing for speech delivery.",
+    "🐱 Whimsical Children's Story (Talking Cat)": "Tell a short, whimsical bedtime story about a mischievous talking cat who sneaks into a grand "
+    "wizard’s library at night and accidentally casts a spell that brings the books to life. Keep the "
+    "tone playful and filled with wonder, ensuring the language flows smoothly.",
+    "🔥 Intense Speech (Freedom & Justice)": "Write a powerful, impassioned speech from a rebel leader rallying their people against a "
+    "tyrant. The speech should be urgent, filled with conviction, and call for freedom and justice, "
+    "making sure the emotional intensity is evident in the phrasing.",
+    "👻 Mysterious Horror Scene (Haunted Lighthouse)": "Describe a chilling ghostly encounter in an abandoned lighthouse on a foggy night. The "
+    "protagonist, alone and cold, hears whispers from the shadows, telling them secrets they were "
+    "never meant to know. Use language that builds suspense and tension, ensuring it sounds "
+    "haunting and engaging.",
+}

src/integrations/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from .anthropic_api import generate_text_with_claude, AnthropicError
 from .elevenlabs_api import text_to_speech_with_elevenlabs, ElevenLabsError
-from .hume_api import text_to_speech_with_hume, get_random_hume_voice_names, HumeError

 from .anthropic_api import generate_text_with_claude, AnthropicError
 from .elevenlabs_api import text_to_speech_with_elevenlabs, ElevenLabsError
+from .hume_api import text_to_speech_with_hume, get_random_hume_voice_names, HumeError

src/integrations/anthropic_api.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 anthropic_api.py
-This file defines the interaction with the Anthropic API, focusing on generating text using the Claude model.
 It includes functionality for input validation, API request handling, and processing API responses.
 Key Features:
@@ -36,8 +36,9 @@ from src.utils import truncate_text, validate_env_var
 @dataclass(frozen=True)
 class AnthropicConfig:
     """Immutable configuration for interacting with the Anthropic API."""
-    api_key: str = validate_env_var('ANTHROPIC_API_KEY')
-    model: ModelParam = 'claude-3-5-sonnet-latest'
     max_tokens: int = 256
     system_prompt: str = f"""You are an imaginative and articulate assistant, skilled in generating creative, concise, and engaging content that is perfectly suited for expressive speech synthesis.
@@ -60,13 +61,13 @@ The generated text will be directly fed into TTS APIs, so avoid ambiguity, and a
     def __post_init__(self):
         # Validate that required attributes are set
         if not self.api_key:
-            raise ValueError('Anthropic API key is not set.')
         if not self.model:
-            raise ValueError('Anthropic Model is not set.')
         if not self.max_tokens:
-            raise ValueError('Anthropic Max Tokens is not set.')
         if not self.system_prompt:
-            raise ValueError('Anthropic System Prompt is not set.')
     @property
     def client(self) -> Anthropic:
@@ -81,6 +82,7 @@ The generated text will be directly fed into TTS APIs, so avoid ambiguity, and a
 class AnthropicError(Exception):
     """Custom exception for errors related to the Anthropic API."""
     def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message)
         self.original_exception = original_exception
@@ -95,7 +97,7 @@ anthropic_config = AnthropicConfig()
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
-    reraise=True
 )
 def generate_text_with_claude(prompt: str) -> str:
     """
@@ -110,7 +112,9 @@ def generate_text_with_claude(prompt: str) -> str:
     Raises:
         AnthropicError: If there is an error communicating with the Anthropic API.
     """
-    logger.debug(f'Generating text with Claude. Prompt length: {len(prompt)} characters.')
     response = None
     try:
@@ -119,36 +123,40 @@ def generate_text_with_claude(prompt: str) -> str:
             model=anthropic_config.model,
             max_tokens=anthropic_config.max_tokens,
             system=anthropic_config.system_prompt,
-            messages=[{'role': 'user', 'content': prompt}],
         )
-        logger.debug(f'API response received: {truncate_text(str(response))}')
         # Validate response
-        if not hasattr(response, 'content'):
             logger.error("Response is missing 'content'. Response: %s", response)
             raise AnthropicError('Invalid API response: Missing "content".')
         # Process response
         blocks: Union[List[TextBlock], TextBlock, None] = response.content
         if isinstance(blocks, list):
-            result = '\n\n'.join(block.text for block in blocks if isinstance(block, TextBlock))
-            logger.debug(f'Processed response from list: {truncate_text(result)}')
             return result
         if isinstance(blocks, TextBlock):
-            logger.debug(f'Processed response from single TextBlock: {truncate_text(blocks.text)}')
             return blocks.text
-        logger.warning(f'Unexpected response type: {type(blocks)}')
-        return str(blocks or 'No content generated.')
     except Exception as e:
-        logger.exception(f'Error generating text with Anthropic: {e}')
         raise AnthropicError(
             message=(
-                f'Error generating text with Anthropic: {e}. '
                 f'HTTP Status: {getattr(response, "status", "N/A")}. '
-                f'Prompt (truncated): {truncate_text(prompt)}. '
-                f'Model: {anthropic_config.model}, Max tokens: {anthropic_config.max_tokens}'
             ),
             original_exception=e,
-        )

 """
 anthropic_api.py
+This file defines the interaction with the Anthropic API, focusing on generating text using the Claude model.
 It includes functionality for input validation, API request handling, and processing API responses.
 Key Features:
 @dataclass(frozen=True)
 class AnthropicConfig:
     """Immutable configuration for interacting with the Anthropic API."""
+    api_key: str = validate_env_var("ANTHROPIC_API_KEY")
+    model: ModelParam = "claude-3-5-sonnet-latest"
     max_tokens: int = 256
     system_prompt: str = f"""You are an imaginative and articulate assistant, skilled in generating creative, concise, and engaging content that is perfectly suited for expressive speech synthesis.
     def __post_init__(self):
         # Validate that required attributes are set
         if not self.api_key:
+            raise ValueError("Anthropic API key is not set.")
         if not self.model:
+            raise ValueError("Anthropic Model is not set.")
         if not self.max_tokens:
+            raise ValueError("Anthropic Max Tokens is not set.")
         if not self.system_prompt:
+            raise ValueError("Anthropic System Prompt is not set.")
     @property
     def client(self) -> Anthropic:
 class AnthropicError(Exception):
     """Custom exception for errors related to the Anthropic API."""
     def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message)
         self.original_exception = original_exception
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
+    reraise=True,
 )
 def generate_text_with_claude(prompt: str) -> str:
     """
     Raises:
         AnthropicError: If there is an error communicating with the Anthropic API.
     """
+    logger.debug(
+        f"Generating text with Claude. Prompt length: {len(prompt)} characters."
+    )
     response = None
     try:
             model=anthropic_config.model,
             max_tokens=anthropic_config.max_tokens,
             system=anthropic_config.system_prompt,
+            messages=[{"role": "user", "content": prompt}],
         )
+        logger.debug(f"API response received: {truncate_text(str(response))}")
         # Validate response
+        if not hasattr(response, "content"):
             logger.error("Response is missing 'content'. Response: %s", response)
             raise AnthropicError('Invalid API response: Missing "content".')
         # Process response
         blocks: Union[List[TextBlock], TextBlock, None] = response.content
         if isinstance(blocks, list):
+            result = "\n\n".join(
+                block.text for block in blocks if isinstance(block, TextBlock)
+            )
+            logger.debug(f"Processed response from list: {truncate_text(result)}")
             return result
         if isinstance(blocks, TextBlock):
+            logger.debug(
+                f"Processed response from single TextBlock: {truncate_text(blocks.text)}"
+            )
             return blocks.text
+        logger.warning(f"Unexpected response type: {type(blocks)}")
+        return str(blocks or "No content generated.")
     except Exception as e:
+        logger.exception(f"Error generating text with Anthropic: {e}")
         raise AnthropicError(
             message=(
+                f"Error generating text with Anthropic: {e}. "
                 f'HTTP Status: {getattr(response, "status", "N/A")}. '
+                f"Prompt (truncated): {truncate_text(prompt)}. "
+                f"Model: {anthropic_config.model}, Max tokens: {anthropic_config.max_tokens}"
             ),
             original_exception=e,
+        )

src/integrations/elevenlabs_api.py CHANGED Viewed

@@ -35,13 +35,14 @@ from src.config import logger
 from src.utils import validate_env_var
-ElevenlabsVoiceName = Literal['Adam', 'Antoni', 'Rachel', 'Matilda']
 class ElevenLabsVoice(Enum):
-    ADAM = ('Adam', 'pNInz6obpgDQGcFmaJgB')
-    ANTONI = ('Antoni', 'ErXwobaYiN019PkySvjV')
-    RACHEL = ('Rachel', '21m00Tcm4TlvDq8ikWAM')
-    MATILDA = ('Matilda', 'XrExE9yKIg1WjnnlVkGX')
     @property
     def voice_name(self) -> ElevenlabsVoiceName:
@@ -57,19 +58,22 @@ class ElevenLabsVoice(Enum):
 @dataclass(frozen=True)
 class ElevenLabsConfig:
     """Immutable configuration for interacting with the ElevenLabs TTS API."""
-    api_key: str = validate_env_var('ELEVENLABS_API_KEY')
-    model_id: str = 'eleven_multilingual_v2'  # ElevenLab's most emotionally expressive model
-    output_format: str = 'mp3_44100_128'  # Output format of the generated audio
     def __post_init__(self):
         # Validate that required attributes are set
         if not self.api_key:
-            raise ValueError('ElevenLabs API key is not set.')
         if not self.model_id:
-            raise ValueError('ElevenLabs Model ID is not set.')
         if not self.output_format:
-            raise ValueError('ElevenLabs Output Format is not set.')
     @property
     def client(self) -> ElevenLabs:
         """
@@ -93,6 +97,7 @@ class ElevenLabsConfig:
 class ElevenLabsError(Exception):
     """Custom exception for errors related to the ElevenLabs TTS API."""
     def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message)
         self.original_exception = original_exception
@@ -107,7 +112,7 @@ elevenlabs_config = ElevenLabsConfig()
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
-    reraise=True
 )
 def text_to_speech_with_elevenlabs(text: str) -> Tuple[ElevenlabsVoiceName, bytes]:
     """
@@ -123,7 +128,9 @@ def text_to_speech_with_elevenlabs(text: str) -> Tuple[ElevenlabsVoiceName, byte
     Raises:
         ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
     """
-    logger.debug(f'Synthesizing speech from text with ElevenLabs. Text length: {len(text)} characters.')
     # Get a random voice as an enum member.
     voice = elevenlabs_config.random_voice
@@ -141,22 +148,24 @@ def text_to_speech_with_elevenlabs(text: str) -> Tuple[ElevenlabsVoiceName, byte
         # Attempt to combine chunks into a single bytes object.
         # If audio_iterator is not iterable or invalid, an exception will be raised.
         try:
-            audio = b''.join(chunk for chunk in audio_iterator)
         except Exception as iter_error:
-            logger.error('Invalid audio iterator response.')
-            raise ElevenLabsError('Invalid audio iterator received from ElevenLabs API.') from iter_error
         # Validate audio
         if not audio:
-            logger.error('No audio data received from ElevenLabs API.')
-            raise ElevenLabsError('Empty audio data received from ElevenLabs API.')
-        logger.info(f'Received ElevenLabs audio ({len(audio)} bytes).')
         return voice.voice_name, audio
     except Exception as e:
-        logger.exception(f'Error synthesizing speech from text with Elevenlabs: {e}')
         raise ElevenLabsError(
-            message=f'Failed to synthesize speech from text with ElevenLabs: {e}',
             original_exception=e,
-        )

 from src.utils import validate_env_var
+ElevenlabsVoiceName = Literal["Adam", "Antoni", "Rachel", "Matilda"]
 class ElevenLabsVoice(Enum):
+    ADAM = ("Adam", "pNInz6obpgDQGcFmaJgB")
+    ANTONI = ("Antoni", "ErXwobaYiN019PkySvjV")
+    RACHEL = ("Rachel", "21m00Tcm4TlvDq8ikWAM")
+    MATILDA = ("Matilda", "XrExE9yKIg1WjnnlVkGX")
     @property
     def voice_name(self) -> ElevenlabsVoiceName:
 @dataclass(frozen=True)
 class ElevenLabsConfig:
     """Immutable configuration for interacting with the ElevenLabs TTS API."""
+    api_key: str = validate_env_var("ELEVENLABS_API_KEY")
+    model_id: str = (
+        "eleven_multilingual_v2"  # ElevenLab's most emotionally expressive model
+    )
+    output_format: str = "mp3_44100_128"  # Output format of the generated audio
     def __post_init__(self):
         # Validate that required attributes are set
         if not self.api_key:
+            raise ValueError("ElevenLabs API key is not set.")
         if not self.model_id:
+            raise ValueError("ElevenLabs Model ID is not set.")
         if not self.output_format:
+            raise ValueError("ElevenLabs Output Format is not set.")
     @property
     def client(self) -> ElevenLabs:
         """
 class ElevenLabsError(Exception):
     """Custom exception for errors related to the ElevenLabs TTS API."""
     def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message)
         self.original_exception = original_exception
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
+    reraise=True,
 )
 def text_to_speech_with_elevenlabs(text: str) -> Tuple[ElevenlabsVoiceName, bytes]:
     """
     Raises:
         ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
     """
+    logger.debug(
+        f"Synthesizing speech from text with ElevenLabs. Text length: {len(text)} characters."
+    )
     # Get a random voice as an enum member.
     voice = elevenlabs_config.random_voice
         # Attempt to combine chunks into a single bytes object.
         # If audio_iterator is not iterable or invalid, an exception will be raised.
         try:
+            audio = b"".join(chunk for chunk in audio_iterator)
         except Exception as iter_error:
+            logger.error("Invalid audio iterator response.")
+            raise ElevenLabsError(
+                "Invalid audio iterator received from ElevenLabs API."
+            ) from iter_error
         # Validate audio
         if not audio:
+            logger.error("No audio data received from ElevenLabs API.")
+            raise ElevenLabsError("Empty audio data received from ElevenLabs API.")
+        logger.info(f"Received ElevenLabs audio ({len(audio)} bytes).")
         return voice.voice_name, audio
     except Exception as e:
+        logger.exception(f"Error synthesizing speech from text with Elevenlabs: {e}")
         raise ElevenLabsError(
+            message=f"Failed to synthesize speech from text with ElevenLabs: {e}",
             original_exception=e,
+        )

src/integrations/hume_api.py CHANGED Viewed

@@ -33,37 +33,44 @@ from src.config import logger
 from src.utils import validate_env_var, truncate_text
-HumeVoiceName = Literal['ITO', 'KORA', 'STELLA', 'DACHER']
 @dataclass(frozen=True)
 class HumeConfig:
     """Immutable configuration for interacting with the Hume TTS API."""
-    api_key: str = validate_env_var('HUME_API_KEY')
-    tts_endpoint_url: str = 'https://api.hume.ai/v0/tts'
-    voice_names: List[HumeVoiceName] = ('ITO', 'KORA', 'STELLA', 'DACHER')
-    audio_format: str = 'wav'
     headers: dict = None
     def __post_init__(self):
         # Validate required attributes
         if not self.api_key:
-            raise ValueError('Hume API key is not set.')
         if not self.tts_endpoint_url:
-            raise ValueError('Hume TTS endpoint URL is not set.')
         if not self.voice_names:
-            raise ValueError('Hume voice names list is not set.')
         if not self.audio_format:
-            raise ValueError('Hume audio format is not set.')
         # Set headers dynamically after validation
-        object.__setattr__(self, 'headers', {
-            'X-Hume-Api-Key': f'{self.api_key}',
-            'Content-Type': 'application/json',
-        })
 class HumeError(Exception):
     """Custom exception for errors related to the Hume TTS API."""
     def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message)
         self.original_exception = original_exception
@@ -78,9 +85,11 @@ hume_config = HumeConfig()
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
-    reraise=True
 )
-def text_to_speech_with_hume(prompt: str, text: str, voice_name: HumeVoiceName) -> bytes:
     """
     Synthesizes text to speech using the Hume TTS API and processes raw binary audio data.
@@ -96,13 +105,13 @@ def text_to_speech_with_hume(prompt: str, text: str, voice_name: HumeVoiceName)
     Raises:
         HumeError: If there is an error communicating with the Hume TTS API.
     """
-    logger.debug(f'Processing TTS with Hume. Prompt length: {len(prompt)} characters. Text length: {len(text)} characters.')
     request_body = {
-        'text': text,
-        'voice': {
-            'name': voice_name
-        },
     }
     try:
@@ -115,26 +124,33 @@ def text_to_speech_with_hume(prompt: str, text: str, voice_name: HumeVoiceName)
         # Validate response
         if response.status_code != 200:
-            logger.error(f'Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)')
-            raise HumeError(f'Hume TTS API responded with status {response.status_code}: {response.text[:200]}')
         # Process response audio
-        if response.headers.get('Content-Type', '').startswith('audio/'):
             audio = response.content  # Raw binary audio data
-            logger.info(f'Received audio data from Hume ({len(audio)} bytes).')
             return voice_name, audio
-        raise HumeError(f'Unexpected Content-Type: {response.headers.get("Content-Type", "Unknown")}')
     except Exception as e:
-        logger.exception(f'Error synthesizing speech from text with Hume: {e}')
         raise HumeError(
-            message=f'Failed to synthesize speech from text with Hume: {e}',
             original_exception=e,
         )
 def get_random_hume_voice_names() -> Tuple[HumeVoiceName, HumeVoiceName]:
-    """
     Get two random Hume voice names.
     Voices:
@@ -143,4 +159,4 @@ def get_random_hume_voice_names() -> Tuple[HumeVoiceName, HumeVoiceName]:
         - STELLA
         - DACHER
     """
-    return tuple(random.sample(hume_config.voice_names, 2))

 from src.utils import validate_env_var, truncate_text
+HumeVoiceName = Literal["ITO", "KORA", "STELLA", "DACHER"]
 @dataclass(frozen=True)
 class HumeConfig:
     """Immutable configuration for interacting with the Hume TTS API."""
+    api_key: str = validate_env_var("HUME_API_KEY")
+    tts_endpoint_url: str = "https://api.hume.ai/v0/tts"
+    voice_names: List[HumeVoiceName] = ("ITO", "KORA", "STELLA", "DACHER")
+    audio_format: str = "wav"
     headers: dict = None
     def __post_init__(self):
         # Validate required attributes
         if not self.api_key:
+            raise ValueError("Hume API key is not set.")
         if not self.tts_endpoint_url:
+            raise ValueError("Hume TTS endpoint URL is not set.")
         if not self.voice_names:
+            raise ValueError("Hume voice names list is not set.")
         if not self.audio_format:
+            raise ValueError("Hume audio format is not set.")
         # Set headers dynamically after validation
+        object.__setattr__(
+            self,
+            "headers",
+            {
+                "X-Hume-Api-Key": f"{self.api_key}",
+                "Content-Type": "application/json",
+            },
+        )
 class HumeError(Exception):
     """Custom exception for errors related to the Hume TTS API."""
     def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message)
         self.original_exception = original_exception
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
+    reraise=True,
 )
+def text_to_speech_with_hume(
+    prompt: str, text: str, voice_name: HumeVoiceName
+) -> bytes:
     """
     Synthesizes text to speech using the Hume TTS API and processes raw binary audio data.
     Raises:
         HumeError: If there is an error communicating with the Hume TTS API.
     """
+    logger.debug(
+        f"Processing TTS with Hume. Prompt length: {len(prompt)} characters. Text length: {len(text)} characters."
+    )
     request_body = {
+        "text": text,
+        "voice": {"name": voice_name},
     }
     try:
         # Validate response
         if response.status_code != 200:
+            logger.error(
+                f"Hume TTS API Error: {response.status_code} - {response.text[:200]}... (truncated)"
+            )
+            raise HumeError(
+                f"Hume TTS API responded with status {response.status_code}: {response.text[:200]}"
+            )
         # Process response audio
+        if response.headers.get("Content-Type", "").startswith("audio/"):
             audio = response.content  # Raw binary audio data
+            logger.info(f"Received audio data from Hume ({len(audio)} bytes).")
             return voice_name, audio
+        raise HumeError(
+            f'Unexpected Content-Type: {response.headers.get("Content-Type", "Unknown")}'
+        )
     except Exception as e:
+        logger.exception(f"Error synthesizing speech from text with Hume: {e}")
         raise HumeError(
+            message=f"Failed to synthesize speech from text with Hume: {e}",
             original_exception=e,
         )
 def get_random_hume_voice_names() -> Tuple[HumeVoiceName, HumeVoiceName]:
+    """
     Get two random Hume voice names.
     Voices:
         - STELLA
         - DACHER
     """
+    return tuple(random.sample(hume_config.voice_names, 2))

src/theme.py CHANGED Viewed

@@ -14,27 +14,28 @@ from collections.abc import Iterable
 from gradio.themes.base import Base
 from gradio.themes.utils import colors, fonts, sizes
 class CustomTheme(Base):
     def __init__(
         self,
         *,
-        primary_hue: colors.Color | str = colors.purple,
-        secondary_hue: colors.Color | str = colors.stone,
-        neutral_hue: colors.Color | str = colors.neutral,
         spacing_size: sizes.Size | str = sizes.spacing_md,
         radius_size: sizes.Size | str = sizes.radius_md,
         text_size: sizes.Size | str = sizes.text_md,
         font: fonts.Font | str | Iterable[fonts.Font | str] = (
-            fonts.GoogleFont('Source Sans Pro'),
-            'ui-sans-serif',
-            'system-ui',
-            'sans-serif',
         ),
         font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
-            fonts.GoogleFont('IBM Plex Mono'),
-            'ui-monospace',
-            'Consolas',
-            'monospace',
         ),
     ):
         super().__init__(
@@ -47,56 +48,49 @@ class CustomTheme(Base):
             font=font,
             font_mono=font_mono,
         )
-        self.name = 'custom_theme'
         super().set(
             # --- Colors ---
-            error_background_fill='#EF4444',
             # error_background_fill_dark='',
-            error_border_color='#B91C1C',
             # error_border_color_dark='',
-            error_icon_color='#B91C1C',
             # error_icon_color_dark='',
-            input_background_fill='#F9FAFB',
             # input_background_fill_dark='',
             # --- Shadows ---
-            input_shadow_focus='0 0 0 *shadow_spread #7C3AED80, *shadow_inset',
             # input_shadow_focus_dark='',
             # --- Gradients ---
-            stat_background_fill='linear-gradient(to right, #7C3AED, #D8B4FE)',
             # stat_background_fill_dark='',
             # --- Button borders ---
-            button_border_width='0px',
-            input_border_width='1px',
             # --- Primary Button ---
-            button_primary_background_fill='#7E22CE',
             # button_primary_background_fill_dark='',
-            button_primary_background_fill_hover='#9333EA',
             # button_primary_background_fill_hover_dark='',
-            button_primary_text_color='#FFFFFF',
             # button_primary_text_color_dark='',
             # --- Secondary Button ---
-            button_secondary_background_fill='#222222',
             # button_secondary_background_fill_dark='#4B5563',
-            button_secondary_background_fill_hover='#3F3F3F',
             # button_secondary_background_fill_hover_dark='#374151',
-            button_secondary_text_color='#FFFFFF',
             # button_secondary_text_color_dark='#FFFFFF',
             # --- Cancel Button ---
-            button_cancel_background_fill='#EF4444',
             # button_cancel_background_fill_dark='#B91C1C',
-            button_cancel_background_fill_hover='#DC2626',
             # button_cancel_background_fill_hover_dark='#991B1B',
-            button_cancel_text_color='#FFFFFF',
             # button_cancel_text_color_dark='#FFFFFF',
-            button_cancel_text_color_hover='#FFFFFF',
             # button_cancel_text_color_hover_dark='#FFFFFF',
             # --- Other ---
-            border_color_accent_subdued='#A78BFA',
-        )

 from gradio.themes.base import Base
 from gradio.themes.utils import colors, fonts, sizes
 class CustomTheme(Base):
     def __init__(
         self,
         *,
+        primary_hue: colors.Color | str = colors.purple,
+        secondary_hue: colors.Color | str = colors.stone,
+        neutral_hue: colors.Color | str = colors.neutral,
         spacing_size: sizes.Size | str = sizes.spacing_md,
         radius_size: sizes.Size | str = sizes.radius_md,
         text_size: sizes.Size | str = sizes.text_md,
         font: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("Source Sans Pro"),
+            "ui-sans-serif",
+            "system-ui",
+            "sans-serif",
         ),
         font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
+            fonts.GoogleFont("IBM Plex Mono"),
+            "ui-monospace",
+            "Consolas",
+            "monospace",
         ),
     ):
         super().__init__(
             font=font,
             font_mono=font_mono,
         )
+        self.name = "custom_theme"
         super().set(
             # --- Colors ---
+            error_background_fill="#EF4444",
             # error_background_fill_dark='',
+            error_border_color="#B91C1C",
             # error_border_color_dark='',
+            error_icon_color="#B91C1C",
             # error_icon_color_dark='',
+            input_background_fill="#F9FAFB",
             # input_background_fill_dark='',
             # --- Shadows ---
+            input_shadow_focus="0 0 0 *shadow_spread #7C3AED80, *shadow_inset",
             # input_shadow_focus_dark='',
             # --- Gradients ---
+            stat_background_fill="linear-gradient(to right, #7C3AED, #D8B4FE)",
             # stat_background_fill_dark='',
             # --- Button borders ---
+            button_border_width="0px",
+            input_border_width="1px",
             # --- Primary Button ---
+            button_primary_background_fill="#7E22CE",
             # button_primary_background_fill_dark='',
+            button_primary_background_fill_hover="#9333EA",
             # button_primary_background_fill_hover_dark='',
+            button_primary_text_color="#FFFFFF",
             # button_primary_text_color_dark='',
             # --- Secondary Button ---
+            button_secondary_background_fill="#222222",
             # button_secondary_background_fill_dark='#4B5563',
+            button_secondary_background_fill_hover="#3F3F3F",
             # button_secondary_background_fill_hover_dark='#374151',
+            button_secondary_text_color="#FFFFFF",
             # button_secondary_text_color_dark='#FFFFFF',
             # --- Cancel Button ---
+            button_cancel_background_fill="#EF4444",
             # button_cancel_background_fill_dark='#B91C1C',
+            button_cancel_background_fill_hover="#DC2626",
             # button_cancel_background_fill_hover_dark='#991B1B',
+            button_cancel_text_color="#FFFFFF",
             # button_cancel_text_color_dark='#FFFFFF',
+            button_cancel_text_color_hover="#FFFFFF",
             # button_cancel_text_color_hover_dark='#FFFFFF',
             # --- Other ---
+            border_color_accent_subdued="#A78BFA",
+        )

src/types.py CHANGED Viewed

@@ -9,7 +9,7 @@ has a consistent structure including both the provider and the associated voice.
 from typing import TypedDict, Literal, Dict
-TTSProviderName = Literal['Hume AI', 'ElevenLabs', 'Unknown']
 """TTSProviderName represents the allowed provider names for TTS services."""
@@ -21,14 +21,15 @@ class OptionDetails(TypedDict):
         provider (TTSProviderName): The name of the provider (either 'Hume AI' or 'ElevenLabs').
         voice (str): The name of the voice associated with the option.
     """
     provider: TTSProviderName
     voice: str
-OptionKey = Literal['Option A', 'Option B']
 """OptionKey is restricted to the literal values 'Option A' or 'Option B'."""
 OptionMap = Dict[OptionKey, OptionDetails]
 """OptionMap defines the structure of the options mapping, where each key is an OptionKey
-and the value is an OptionDetails dictionary."""

 from typing import TypedDict, Literal, Dict
+TTSProviderName = Literal["Hume AI", "ElevenLabs", "Unknown"]
 """TTSProviderName represents the allowed provider names for TTS services."""
         provider (TTSProviderName): The name of the provider (either 'Hume AI' or 'ElevenLabs').
         voice (str): The name of the voice associated with the option.
     """
     provider: TTSProviderName
     voice: str
+OptionKey = Literal["Option A", "Option B"]
 """OptionKey is restricted to the literal values 'Option A' or 'Option B'."""
 OptionMap = Dict[OptionKey, OptionDetails]
 """OptionMap defines the structure of the options mapping, where each key is an OptionKey
+and the value is an OptionDetails dictionary."""

src/utils.py CHANGED Viewed

@@ -37,14 +37,14 @@ def truncate_text(text: str, max_length: int = 50) -> str:
         ''
     """
     if max_length <= 0:
-        logger.warning(f'Invalid max_length={max_length}. Returning empty string.')
-        return ''
     is_truncated = len(text) > max_length
     if is_truncated:
-        logger.debug(f'Truncated text to {max_length} characters.')
-    return text[:max_length] + ('...' if is_truncated else '')
 def validate_env_var(var_name: str) -> str:
@@ -59,7 +59,7 @@ def validate_env_var(var_name: str) -> str:
     Raises:
         ValueError: If the environment variable is not set.
     Examples:
         >>> import os
         >>> os.environ["EXAMPLE_VAR"] = "example_value"
@@ -71,9 +71,11 @@ def validate_env_var(var_name: str) -> str:
           ...
         ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
     """
-    value = os.environ.get(var_name, '')
     if not value:
-        raise ValueError(f'{var_name} is not set. Please ensure it is defined in your environment variables.')
     return value
@@ -98,17 +100,19 @@ def validate_prompt_length(prompt: str, max_length: int, min_length: int) -> Non
     """
     stripped_prompt = prompt.strip()
     prompt_length = len(stripped_prompt)
-    logger.debug(f'Prompt length being validated: {prompt_length} characters')
     if prompt_length < min_length:
         raise ValueError(
-            f'Your prompt is too short. Please enter at least {min_length} characters. '
-            f'(Current length: {prompt_length})'
         )
     if prompt_length > max_length:
         raise ValueError(
-            f'Your prompt is too long. Please limit it to {max_length} characters. '
-            f'(Current length: {prompt_length})'
         )
-    logger.debug(f'Prompt length validation passed for prompt: {truncate_text(stripped_prompt)}')

         ''
     """
     if max_length <= 0:
+        logger.warning(f"Invalid max_length={max_length}. Returning empty string.")
+        return ""
     is_truncated = len(text) > max_length
     if is_truncated:
+        logger.debug(f"Truncated text to {max_length} characters.")
+    return text[:max_length] + ("..." if is_truncated else "")
 def validate_env_var(var_name: str) -> str:
     Raises:
         ValueError: If the environment variable is not set.
     Examples:
         >>> import os
         >>> os.environ["EXAMPLE_VAR"] = "example_value"
           ...
         ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
     """
+    value = os.environ.get(var_name, "")
     if not value:
+        raise ValueError(
+            f"{var_name} is not set. Please ensure it is defined in your environment variables."
+        )
     return value
     """
     stripped_prompt = prompt.strip()
     prompt_length = len(stripped_prompt)
+    logger.debug(f"Prompt length being validated: {prompt_length} characters")
     if prompt_length < min_length:
         raise ValueError(
+            f"Your prompt is too short. Please enter at least {min_length} characters. "
+            f"(Current length: {prompt_length})"
         )
     if prompt_length > max_length:
         raise ValueError(
+            f"Your prompt is too long. Please limit it to {max_length} characters. "
+            f"(Current length: {prompt_length})"
         )
+    logger.debug(
+        f"Prompt length validation passed for prompt: {truncate_text(stripped_prompt)}"
+    )