updated
Browse files- Dockerfile +23 -0
- app.py +53 -0
- requirements.txt +6 -0
- src/__init__.py +0 -0
- src/__pycache__/__init__.cpython-312.pyc +0 -0
- src/core/__init__.py +0 -0
- src/core/__pycache__/__init__.cpython-312.pyc +0 -0
- src/core/__pycache__/speechtotext.cpython-312.pyc +0 -0
- src/core/__pycache__/texttospeech.cpython-312.pyc +0 -0
- src/core/__pycache__/texttotext.cpython-312.pyc +0 -0
- src/core/speechtotext.py +24 -0
- src/core/texttospeech.py +42 -0
- src/core/texttotext.py +45 -0
Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as a base image
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
# Set environment variables
|
5 |
+
ENV PYTHONUNBUFFERED=1
|
6 |
+
|
7 |
+
# Create and set the working directory
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
# Copy the requirements.txt file into the container at /app
|
11 |
+
COPY requirements.txt /app/
|
12 |
+
|
13 |
+
# Install any needed dependencies
|
14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
15 |
+
|
16 |
+
# Copy the current directory contents into the container at /app
|
17 |
+
COPY . /app/
|
18 |
+
|
19 |
+
# Expose the port FastAPI will run on
|
20 |
+
EXPOSE 7860
|
21 |
+
|
22 |
+
# Run the application using uvicorn
|
23 |
+
CMD ["python", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, WebSocket,WebSocketDisconnect
|
2 |
+
import asyncio
|
3 |
+
import base64
|
4 |
+
from src.core.speechtotext import SpeechToText
|
5 |
+
from src.core.texttospeech import TextToSpeech
|
6 |
+
from src.core.texttotext import ConversationHandler
|
7 |
+
import os
|
8 |
+
app = FastAPI()
|
9 |
+
spt = SpeechToText()
|
10 |
+
ttt = ConversationHandler()
|
11 |
+
tts = TextToSpeech()
|
12 |
+
@app.websocket("/ws/voicechat")
|
13 |
+
async def websocket_endpoint(websocket: WebSocket):
|
14 |
+
await websocket.accept()
|
15 |
+
print("User connected.")
|
16 |
+
audio_buffer = bytearray()
|
17 |
+
|
18 |
+
try:
|
19 |
+
while True:
|
20 |
+
try:
|
21 |
+
audio_data = await asyncio.wait_for(websocket.receive_bytes(), timeout=3.0)
|
22 |
+
print(f"Received {len(audio_data)} bytes")
|
23 |
+
audio_buffer.extend(audio_data)
|
24 |
+
|
25 |
+
except asyncio.TimeoutError:
|
26 |
+
if len(audio_buffer) > 0:
|
27 |
+
print("Silence detected. Processing speech...")
|
28 |
+
transcript = await spt.trancribe_audio(audio_buffer)
|
29 |
+
audio_buffer.clear()
|
30 |
+
|
31 |
+
if transcript:
|
32 |
+
print(f"User said: {transcript}")
|
33 |
+
response = await ttt.handle_conversation(transcript)
|
34 |
+
if response:
|
35 |
+
print(f"AI Response: {response}")
|
36 |
+
audio = await tts.synthesize(response)
|
37 |
+
audio_base64 = base64.b64encode(audio).decode("utf-8")
|
38 |
+
await websocket.send_json({
|
39 |
+
"transcript": transcript,
|
40 |
+
"response": response,
|
41 |
+
"audio": audio_base64,
|
42 |
+
"status": "complete"
|
43 |
+
})
|
44 |
+
await websocket.receive_text()
|
45 |
+
|
46 |
+
except Exception as e:
|
47 |
+
print(f"Error: {e}")
|
48 |
+
except WebSocketDisconnect:
|
49 |
+
print("User disconnected.")
|
50 |
+
if __name__ == '__main__':
|
51 |
+
import uvicorn
|
52 |
+
uvicorn.run(app,port=7860,host= "0.0.0.0",
|
53 |
+
timeout_keep_alive=300, timeout_graceful_shutdown=600)
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
elevenlabs
|
2 |
+
groq
|
3 |
+
python-dotenv
|
4 |
+
Requests
|
5 |
+
fastapi
|
6 |
+
websockets
|
src/__init__.py
ADDED
File without changes
|
src/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (142 Bytes). View file
|
|
src/core/__init__.py
ADDED
File without changes
|
src/core/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (147 Bytes). View file
|
|
src/core/__pycache__/speechtotext.cpython-312.pyc
ADDED
Binary file (1.46 kB). View file
|
|
src/core/__pycache__/texttospeech.cpython-312.pyc
ADDED
Binary file (3.87 kB). View file
|
|
src/core/__pycache__/texttotext.cpython-312.pyc
ADDED
Binary file (2.54 kB). View file
|
|
src/core/speechtotext.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import wave
|
2 |
+
import io
|
3 |
+
from groq import Groq
|
4 |
+
|
5 |
+
|
6 |
+
class SpeechToText:
|
7 |
+
def __init__(self):
|
8 |
+
self.client=Groq()
|
9 |
+
|
10 |
+
async def trancribe_audio(self,audio_bytes:bytes):
|
11 |
+
wav_buffer=io.BytesIO(audio_bytes)
|
12 |
+
print("i am here")
|
13 |
+
try :
|
14 |
+
transcription = self.client.audio.transcriptions.create(
|
15 |
+
file=("audio.wav", wav_buffer),
|
16 |
+
model="whisper-large-v3-turbo"
|
17 |
+
)
|
18 |
+
print(f"the text is {transcription.text}")
|
19 |
+
|
20 |
+
return transcription.text
|
21 |
+
except Exception as e:
|
22 |
+
print(f"Error transcribing audio: {e}")
|
23 |
+
return None
|
24 |
+
|
src/core/texttospeech.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from elevenlabs import ElevenLabs,Voice,VoiceSettings,play
|
2 |
+
import os
|
3 |
+
|
4 |
+
class TextToSpeech:
|
5 |
+
def __init__(self):
|
6 |
+
self.client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
|
7 |
+
|
8 |
+
async def synthesize(self,text:str):
|
9 |
+
if not text.strip():
|
10 |
+
raise ValueError("Input text cannot be empty")
|
11 |
+
if len(text)>5000:
|
12 |
+
raise ValueError("Input text cannot exceed 5000 characters")
|
13 |
+
try:
|
14 |
+
|
15 |
+
audio_generator =self.client.generate(
|
16 |
+
text=text,
|
17 |
+
voice=Voice(
|
18 |
+
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
|
19 |
+
settings=VoiceSettings(stability=0.5, similarity_boost=0.5),
|
20 |
+
),
|
21 |
+
model=os.getenv("TTS_MODEL_NAME"),
|
22 |
+
|
23 |
+
)
|
24 |
+
audio_bytes = b"".join(audio_generator)
|
25 |
+
return audio_bytes
|
26 |
+
except Exception as e:
|
27 |
+
print(f"Error synthesizing text: {str(e)}")
|
28 |
+
return None
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
if __name__ == "__main__":
|
33 |
+
import asyncio
|
34 |
+
async def main():
|
35 |
+
tts = TextToSpeech()
|
36 |
+
audio_bytes = await tts.synthesize('''इपिङ सफ्टवेयरले गुगलको ट्रान्सलिटरेसन सेवा प्रयोग गर्दछ। यसले छिटो र सही टाइपिङ प्रदान गर्दछ, जसले वेबमा नेपाली भाषा टाइप गर्न सजिलो बनाउँछ। तपाईंले अंग्रेजीमा शब्द टाइप गरी स्पेसबार थिचेपछि, उक्त शब्द नेपालीमा रूपान्तरण हुनेछ। तपाईंले ब्याकस्पेस थिचेर वा चयन गरिएको शब्दमा क्लिक गरेर थप विकल्पहरू पनि प्राप्त गर्न सक्नुहुन्छ। यो प्रक्रिया छिटो छ र असीमित क्यारेक्टरहरू र शब्दहरू रूपान्तरण गर्न सक्षम छ। अझै, स्पेसबार थिचेपछि, पाठ तपाईंको कम्प्युटरमा स्वचालित रूपमा सुरक्षित हुनेछ, जसले ब्राउजर क्र्यास भएमा वा पछि पुन: आगमन गर्दा पहिलेको रूपान्तरण गरिएको पाठ पुन: प्राप्त गर्न मद्दत गर्दछ। ''')
|
37 |
+
play(audio_bytes)
|
38 |
+
|
39 |
+
|
40 |
+
asyncio.run(main())
|
41 |
+
|
42 |
+
|
src/core/texttotext.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from groq import AsyncGroq
|
2 |
+
|
3 |
+
|
4 |
+
|
5 |
+
class ConversationHandler:
|
6 |
+
def __init__(self):
|
7 |
+
self.client = AsyncGroq()
|
8 |
+
|
9 |
+
async def handle_conversation(self, transcription):
|
10 |
+
messages = [
|
11 |
+
{
|
12 |
+
"role": "system",
|
13 |
+
"content":'''You are a friendly and engaging virtual assistant named Callme, designed to assist calling agents in creating pleasant and effective phone interactions. Your persona is warm, approachable, and always ready to help, making every caller feel valued.
|
14 |
+
Your task is to respond to incoming calls with a sweet and succinct greeting that sets a positive tone for the conversation.
|
15 |
+
Here are some details to keep in mind:
|
16 |
+
The response should be brief, ideally no longer than a couple of sentences.
|
17 |
+
Make sure to convey enthusiasm and willingness to assist.
|
18 |
+
'''
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"role": "user",
|
22 |
+
"content": transcription,
|
23 |
+
}
|
24 |
+
]
|
25 |
+
completion = await self.client.chat.completions.create(
|
26 |
+
messages=messages,
|
27 |
+
model="llama-3.3-70b-versatile",
|
28 |
+
temperature=0.5,
|
29 |
+
max_tokens=125,
|
30 |
+
top_p=1,
|
31 |
+
n=1,
|
32 |
+
)
|
33 |
+
print(completion.choices[0].message.content)
|
34 |
+
return completion.choices[0].message.content
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
if __name__ == "__main__":
|
39 |
+
import asyncio
|
40 |
+
async def main():
|
41 |
+
handler = ConversationHandler()
|
42 |
+
transcription = await handler.handle_conversation("Hi, I need help with a technical issue.")
|
43 |
+
print(transcription)
|
44 |
+
|
45 |
+
asyncio.run(main())
|