Spaces:

puzan789
/

arkos

Sleeping

App Files Files Community

puzan789 commited on Feb 22

Commit

8d1b3ab

1 Parent(s): 4e31ab5

updated

Browse files

Files changed (13) hide show

Dockerfile +23 -0
app.py +53 -0
requirements.txt +6 -0
src/__init__.py +0 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/core/__init__.py +0 -0
src/core/__pycache__/__init__.cpython-312.pyc +0 -0
src/core/__pycache__/speechtotext.cpython-312.pyc +0 -0
src/core/__pycache__/texttospeech.cpython-312.pyc +0 -0
src/core/__pycache__/texttotext.cpython-312.pyc +0 -0
src/core/speechtotext.py +24 -0
src/core/texttospeech.py +42 -0
src/core/texttotext.py +45 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Use an official Python runtime as a base image
+FROM python:3.11-slim
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+# Create and set the working directory
+WORKDIR /app
+# Copy the requirements.txt file into the container at /app
+COPY requirements.txt /app/
+# Install any needed dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the current directory contents into the container at /app
+COPY . /app/
+# Expose the port FastAPI will run on
+EXPOSE 7860
+# Run the application using uvicorn
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from fastapi import FastAPI, WebSocket,WebSocketDisconnect
+import asyncio
+import base64
+from src.core.speechtotext import SpeechToText
+from src.core.texttospeech import TextToSpeech
+from src.core.texttotext import ConversationHandler
+import os
+app = FastAPI()
+spt = SpeechToText()
+ttt = ConversationHandler()
+tts = TextToSpeech()
+@app.websocket("/ws/voicechat")
+async def websocket_endpoint(websocket: WebSocket):
+    await websocket.accept()
+    print("User connected.")
+    audio_buffer = bytearray()
+    try:
+        while True:
+            try:
+                audio_data = await asyncio.wait_for(websocket.receive_bytes(), timeout=3.0)
+                print(f"Received {len(audio_data)} bytes")
+                audio_buffer.extend(audio_data)
+            except asyncio.TimeoutError:
+                if len(audio_buffer) > 0:
+                    print("Silence detected. Processing speech...")
+                    transcript = await spt.trancribe_audio(audio_buffer)
+                    audio_buffer.clear()
+                    if transcript:
+                        print(f"User said: {transcript}")
+                        response = await ttt.handle_conversation(transcript)
+                        if response:
+                            print(f"AI Response: {response}")
+                            audio = await tts.synthesize(response)
+                            audio_base64 = base64.b64encode(audio).decode("utf-8")
+                            await websocket.send_json({
+                                "transcript": transcript,
+                                "response": response,
+                                "audio": audio_base64,
+                                "status": "complete"
+                            })
+                            await websocket.receive_text()
+    except Exception as e:
+        print(f"Error: {e}")
+    except WebSocketDisconnect:
+        print("User disconnected.")
+if __name__ == '__main__':
+    import uvicorn
+    uvicorn.run(app,port=7860,host= "0.0.0.0",
+                timeout_keep_alive=300, timeout_graceful_shutdown=600)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+elevenlabs
+groq
+python-dotenv
+Requests
+fastapi
+websockets

src/__init__.py ADDED Viewed

File without changes

src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (142 Bytes). View file

src/core/__init__.py ADDED Viewed

File without changes

src/core/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (147 Bytes). View file

src/core/__pycache__/speechtotext.cpython-312.pyc ADDED Viewed

Binary file (1.46 kB). View file

src/core/__pycache__/texttospeech.cpython-312.pyc ADDED Viewed

Binary file (3.87 kB). View file

src/core/__pycache__/texttotext.cpython-312.pyc ADDED Viewed

Binary file (2.54 kB). View file

src/core/speechtotext.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import wave
+import io
+from groq import Groq
+class SpeechToText:
+    def __init__(self):
+        self.client=Groq()
+    async def trancribe_audio(self,audio_bytes:bytes):
+        wav_buffer=io.BytesIO(audio_bytes)
+        print("i am here")
+        try :
+            transcription = self.client.audio.transcriptions.create(
+                file=("audio.wav", wav_buffer),
+                model="whisper-large-v3-turbo"
+            )
+            print(f"the text is {transcription.text}")
+            return transcription.text
+        except Exception as e:
+            print(f"Error transcribing audio: {e}")
+            return None

src/core/texttospeech.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from elevenlabs import ElevenLabs,Voice,VoiceSettings,play
+import os
+class TextToSpeech:
+    def __init__(self):
+        self.client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
+    async def synthesize(self,text:str):
+        if not text.strip():
+            raise ValueError("Input text cannot be empty")
+        if len(text)>5000:
+            raise ValueError("Input text cannot exceed 5000 characters")
+        try:
+            audio_generator =self.client.generate(
+                text=text,
+                voice=Voice(
+                    voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+                    settings=VoiceSettings(stability=0.5, similarity_boost=0.5),
+                ),
+                model=os.getenv("TTS_MODEL_NAME"),
+            )
+            audio_bytes = b"".join(audio_generator)
+            return audio_bytes
+        except Exception as e:
+            print(f"Error synthesizing text: {str(e)}")
+            return None
+if __name__ == "__main__":
+    import asyncio
+    async def main():
+        tts = TextToSpeech()
+        audio_bytes = await tts.synthesize('''इपिङ सफ्टवेयरले गुगलको ट्रान्सलिटरेसन सेवा प्रयोग गर्दछ। यसले छिटो र सही टाइपिङ प्रदान गर्दछ, जसले वेबमा नेपाली भाषा टाइप गर्न सजिलो बनाउँछ। तपाईंले अंग्रेजीमा शब्द टाइप गरी स्पेसबार थिचेपछि, उक्त शब्द नेपालीमा रूपान्तरण हुनेछ। तपाईंले ब्याकस्पेस थिचेर वा चयन गरिएको शब्दमा क्लिक गरेर थप विकल्पहरू पनि प्राप्त गर्न सक्नुहुन्छ। यो प्रक्रिया छिटो छ र असीमित क्यारेक्टरहरू र शब्दहरू रूपान्तरण गर्न सक्षम छ। अझै, स्पेसबार थिचेपछि, पाठ तपाईंको कम्प्युटरमा स्वचालित रूपमा सुरक्षित हुनेछ, जसले ब्राउजर क्र्यास भएमा वा पछि पुन: आगमन गर्दा पहिलेको रूपान्तरण गरिएको पाठ पुन: प्राप्त गर्न मद्दत गर्दछ। ''')
+        play(audio_bytes)
+    asyncio.run(main())

src/core/texttotext.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from groq import AsyncGroq
+class ConversationHandler:
+    def __init__(self):
+        self.client = AsyncGroq()
+    async def handle_conversation(self, transcription):
+        messages = [
+            {
+                "role": "system",
+                "content":'''You are a friendly and engaging virtual assistant named Callme, designed to assist calling agents in creating pleasant and effective phone interactions. Your persona is warm, approachable, and always ready to help, making every caller feel valued.
+                    Your task is to respond to incoming calls with a sweet and succinct greeting that sets a positive tone for the conversation.
+                    Here are some details to keep in mind:
+                        The response should be brief, ideally no longer than a couple of sentences.
+                        Make sure to convey enthusiasm and willingness to assist.
+                        '''
+            },
+            {
+                "role": "user",
+                "content": transcription,
+            }
+        ]
+        completion = await self.client.chat.completions.create(
+            messages=messages,
+            model="llama-3.3-70b-versatile",
+            temperature=0.5,
+            max_tokens=125,
+            top_p=1,
+            n=1,
+        )
+        print(completion.choices[0].message.content)
+        return completion.choices[0].message.content
+if __name__ == "__main__":
+    import asyncio
+    async def main():
+        handler = ConversationHandler()
+        transcription = await handler.handle_conversation("Hi, I need help with a technical issue.")
+        print(transcription)
+    asyncio.run(main())