# Code copied from https://elevenlabs.io/docs/cookbooks/text-to-speech/streaming import base64 from io import BytesIO from typing import IO import yaml from elevenlabs import VoiceSettings from elevenlabs.client import ElevenLabs from hackathon.config import settings client = ElevenLabs(api_key=settings.ELEVENLABS_API_KEY) voices = {"politician1": "ohZqJahxofk8dkPKmd9F", "politician2": "v7sy7EHXxN3ToffFQfvr"} # voice_id: "ohZqJahxofk8dkPKmd9F" # Another voice just in case def read_audio_config(yaml_path: str) -> dict: try: with open(yaml_path, "r") as file: config = yaml.safe_load(file) return config except FileNotFoundError: raise FileNotFoundError(f"The file at path '{yaml_path}' does not exist.") except yaml.YAMLError as e: raise ValueError(f"Error parsing YAML file: {e}") def read_audio_file(audio_path: str): with open(audio_path, "rb") as audio_file: audio_base64 = base64.b64encode(audio_file.read()).decode("utf-8") return audio_base64 def text_to_speech_file( text: str, voice_id: str, stability=0.5, similarity=1.0, style=0.3, base_path="audio_store", ) -> str: """voice: politician1 or politician2""" # Calling the text_to_speech conversion API with detailed parameters response = client.text_to_speech.convert( voice_id=voice_id, # Adam pre-made voice output_format="mp3_44100_32", text=text, model_id="eleven_turbo_v2_5", # use the turbo model for low latency voice_settings=VoiceSettings( stability=0.5, similarity_boost=1.0, style=0.3, use_speaker_boost=True, ), ) audio_data = BytesIO() for chunk in response: if chunk: audio_data.write(chunk) audio_data.seek(0) audio_base64 = base64.b64encode(audio_data.read()).decode("utf-8") return audio_base64 def text_to_speech_stream( text: str, voice: str, stability=0.5, similarity=1.0, style=0.3 ) -> IO[bytes]: """voice: politician1 or politician2""" # Perform the text-to-speech conversion response = client.text_to_speech.convert( voice_id=voices[voice], # Adam pre-made voice output_format="mp3_22050_32", text=text, model_id="eleven_multilingual_v2", voice_settings=VoiceSettings( stability=0.0, similarity_boost=1.0, style=0.0, use_speaker_boost=True, ), ) # Create a BytesIO object to hold the audio data in memory audio_stream = BytesIO() # Write each chunk of audio data to the stream for chunk in response: if chunk: audio_stream.write(chunk) # Reset stream position to the beginning audio_stream.seek(0) # Return the stream for further use return audio_stream