Spaces:

SarwarShafee
/

ERP-system

Runtime error

App Files Files Community

ERP-system / erp_core /asr_and_tts.py

sarwarshafee8709809365

readme updated

004b22a 8 months ago

raw

history blame contribute delete

2.3 kB

	import os
	# from dotenv import load_dotenv
	import tempfile
	import scipy.io.wavfile as wavfile
	from openai import OpenAI
	# from elevenlabs import ElevenLabs, VoiceSettings, play, stream

	# Load API keys from .env file
	# load_dotenv(override=True)
	openai_api_key = os.getenv('OPENAI_API_KEY')
	# elevenlabs_api_key = os.getenv('ELEVENLABS_API_KEY')

	# Initialize clients
	openai_client = OpenAI()
	# elevenlabs_client = ElevenLabs(api_key=elevenlabs_api_key)

	# Function to transcribe audio using OpenAI Whisper API
	def transcribe(audio):
	if audio is None:
	return "No audio provided.", None

	# Audio is received as a tuple (sample_rate, audio_data)
	sample_rate, audio_data = audio

	# Save the audio data to a temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	wavfile.write(temp_file.name, sample_rate, audio_data)
	temp_file_path = temp_file.name

	# Transcribe the audio file using OpenAI Whisper API
	with open(temp_file_path, "rb") as audio_file:
	transcription_response = openai_client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	)

	transcription_text = transcription_response.text
	return transcription_text

	def tts(response_text):
	# Now, use ElevenLabs to convert the transcription text to speech
	# tts_response = elevenlabs_client.text_to_speech.convert(
	# voice_id="CwhRBWXzGAHq8TQ4Fs17",
	# optimize_streaming_latency="0",
	# output_format="mp3_22050_32",
	# text=response_text,
	# voice_settings=VoiceSettings(
	# stability=0.1,
	# similarity_boost=0.3,
	# style=0.2,
	# ),
	# )

	# audio_file_path = "output_audio.mp3"
	# with open(audio_file_path, "wb") as audio_file:
	# for chunk in tts_response:
	# audio_file.write(chunk)

	# return audio_file_path

	tts_client = OpenAI()

	response = tts_client.audio.speech.create(
	model="tts-1",
	voice="onyx",
	input=response_text,
	)
	# file_path = "output.mp3"
	# if os.path.exists(file_path):
	# # Delete the file
	# os.remove(file_path)
	response.stream_to_file("output.mp3")
	return "output.mp3"