freddyaboulton HF staff commited on
Commit
55525a7
·
verified ·
1 Parent(s): d75ee3c

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +25 -15
  2. index.html +61 -18
app.py CHANGED
@@ -13,6 +13,7 @@ from fastapi.responses import HTMLResponse
13
  from fastrtc import (
14
  AsyncStreamHandler,
15
  Stream,
 
16
  get_twilio_turn_credentials,
17
  )
18
  from google import genai
@@ -62,12 +63,18 @@ class GeminiHandler(AsyncStreamHandler):
62
  )
63
 
64
  async def start_up(self):
65
- await self.wait_for_args()
66
- api_key, voice_name = self.latest_args[1:]
67
- client = genai.Client(
68
- api_key=api_key or os.getenv("GEMINI_API_KEY"),
69
- http_options={"api_version": "v1alpha"},
70
- )
 
 
 
 
 
 
71
  config = LiveConnectConfig(
72
  response_modalities=["AUDIO"], # type: ignore
73
  speech_config=SpeechConfig(
@@ -78,15 +85,18 @@ class GeminiHandler(AsyncStreamHandler):
78
  )
79
  ),
80
  )
81
- async with client.aio.live.connect(
82
- model="gemini-2.0-flash-exp", config=config
83
- ) as session:
84
- async for audio in session.start_stream(
85
- stream=self.stream(), mime_type="audio/pcm"
86
- ):
87
- if audio.data:
88
- array = np.frombuffer(audio.data, dtype=np.int16)
89
- self.output_queue.put_nowait(array)
 
 
 
90
 
91
  async def stream(self) -> AsyncGenerator[bytes, None]:
92
  while not self.quit.is_set():
 
13
  from fastrtc import (
14
  AsyncStreamHandler,
15
  Stream,
16
+ WebRTCError,
17
  get_twilio_turn_credentials,
18
  )
19
  from google import genai
 
63
  )
64
 
65
  async def start_up(self):
66
+ if not self.phone_mode:
67
+ await self.wait_for_args()
68
+ api_key, voice_name = self.latest_args[1:]
69
+ else:
70
+ api_key, voice_name = None, "Puck"
71
+ try:
72
+ client = genai.Client(
73
+ api_key=api_key or os.getenv("GEMINI_API_KEY"),
74
+ http_options={"api_version": "v1alpha"},
75
+ )
76
+ except Exception as e:
77
+ raise WebRTCError(str(e))
78
  config = LiveConnectConfig(
79
  response_modalities=["AUDIO"], # type: ignore
80
  speech_config=SpeechConfig(
 
85
  )
86
  ),
87
  )
88
+ try:
89
+ async with client.aio.live.connect(
90
+ model="gemini-2.0-flash-exp", config=config
91
+ ) as session:
92
+ async for audio in session.start_stream(
93
+ stream=self.stream(), mime_type="audio/pcm"
94
+ ):
95
+ if audio.data:
96
+ array = np.frombuffer(audio.data, dtype=np.int16)
97
+ self.output_queue.put_nowait(array)
98
+ except Exception as e:
99
+ raise WebRTCError(str(e))
100
 
101
  async def stream(self) -> AsyncGenerator[bytes, None]:
102
  while not self.quit.is_set():
index.html CHANGED
@@ -147,11 +147,29 @@
147
  transform: translateX(-0%) scale(var(--audio-level, 1));
148
  transition: transform 0.1s ease;
149
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  </style>
151
  </head>
152
 
153
 
154
  <body>
 
 
155
  <div style="text-align: center">
156
  <h1>Gemini Voice Chat</h1>
157
  <p>Speak with Gemini using real-time audio streaming</p>
@@ -229,6 +247,17 @@
229
  }
230
  }
231
 
 
 
 
 
 
 
 
 
 
 
 
232
  async function setupWebRTC() {
233
  const config = __RTC_CONFIGURATION__;
234
  peerConnection = new RTCPeerConnection(config);
@@ -286,7 +315,24 @@
286
 
287
  // Create data channel for messages
288
  dataChannel = peerConnection.createDataChannel('text');
289
- dataChannel.onmessage = handleMessage;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
 
291
  // Create and send offer
292
  const offer = await peerConnection.createOffer();
@@ -317,26 +363,22 @@
317
  });
318
 
319
  const serverResponse = await response.json();
 
 
 
 
 
 
 
 
 
 
320
  await peerConnection.setRemoteDescription(serverResponse);
321
  } catch (err) {
322
  console.error('Error setting up WebRTC:', err);
323
- }
324
- }
325
-
326
- function handleMessage(event) {
327
- const eventJson = JSON.parse(event.data);
328
- if (eventJson.type === "send_input") {
329
- fetch('/input_hook', {
330
- method: 'POST',
331
- headers: {
332
- 'Content-Type': 'application/json',
333
- },
334
- body: JSON.stringify({
335
- webrtc_id: webrtc_id,
336
- api_key: apiKeyInput.value,
337
- voice_name: voiceSelect.value
338
- })
339
- });
340
  }
341
  }
342
 
@@ -364,6 +406,7 @@
364
  if (audioContext) {
365
  audioContext.close();
366
  }
 
367
  }
368
 
369
  startButton.addEventListener('click', () => {
 
147
  transform: translateX(-0%) scale(var(--audio-level, 1));
148
  transition: transform 0.1s ease;
149
  }
150
+
151
+ /* Add styles for toast notifications */
152
+ .toast {
153
+ position: fixed;
154
+ top: 20px;
155
+ left: 50%;
156
+ transform: translateX(-50%);
157
+ background-color: #f44336;
158
+ color: white;
159
+ padding: 16px 24px;
160
+ border-radius: 4px;
161
+ font-size: 14px;
162
+ z-index: 1000;
163
+ display: none;
164
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
165
+ }
166
  </style>
167
  </head>
168
 
169
 
170
  <body>
171
+ <!-- Add toast element after body opening tag -->
172
+ <div id="error-toast" class="toast"></div>
173
  <div style="text-align: center">
174
  <h1>Gemini Voice Chat</h1>
175
  <p>Speak with Gemini using real-time audio streaming</p>
 
247
  }
248
  }
249
 
250
+ function showError(message) {
251
+ const toast = document.getElementById('error-toast');
252
+ toast.textContent = message;
253
+ toast.style.display = 'block';
254
+
255
+ // Hide toast after 5 seconds
256
+ setTimeout(() => {
257
+ toast.style.display = 'none';
258
+ }, 5000);
259
+ }
260
+
261
  async function setupWebRTC() {
262
  const config = __RTC_CONFIGURATION__;
263
  peerConnection = new RTCPeerConnection(config);
 
315
 
316
  // Create data channel for messages
317
  dataChannel = peerConnection.createDataChannel('text');
318
+ dataChannel.onmessage = (event) => {
319
+ const eventJson = JSON.parse(event.data);
320
+ if (eventJson.type === "error") {
321
+ showError(eventJson.message);
322
+ } else if (eventJson.type === "send_input") {
323
+ fetch('/input_hook', {
324
+ method: 'POST',
325
+ headers: {
326
+ 'Content-Type': 'application/json',
327
+ },
328
+ body: JSON.stringify({
329
+ webrtc_id: webrtc_id,
330
+ api_key: apiKeyInput.value,
331
+ voice_name: voiceSelect.value
332
+ })
333
+ });
334
+ }
335
+ };
336
 
337
  // Create and send offer
338
  const offer = await peerConnection.createOffer();
 
363
  });
364
 
365
  const serverResponse = await response.json();
366
+
367
+ if (serverResponse.status === 'failed') {
368
+ showError(serverResponse.meta.error === 'concurrency_limit_reached'
369
+ ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
370
+ : serverResponse.meta.error);
371
+ stop();
372
+ startButton.textContent = 'Start Recording';
373
+ return;
374
+ }
375
+
376
  await peerConnection.setRemoteDescription(serverResponse);
377
  } catch (err) {
378
  console.error('Error setting up WebRTC:', err);
379
+ showError('Failed to establish connection. Please try again.');
380
+ stop();
381
+ startButton.textContent = 'Start Recording';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  }
383
  }
384
 
 
406
  if (audioContext) {
407
  audioContext.close();
408
  }
409
+ updateButtonState();
410
  }
411
 
412
  startButton.addEventListener('click', () => {