Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8" /> | |
<title>Gemini Live Chat - Voice and Text Interaction</title> | |
<style> | |
body { | |
max-width: 800px; | |
margin: 2em auto; | |
padding: 0 1em; | |
font-family: system-ui, -apple-system, sans-serif; | |
} | |
#visualizer { | |
width: 100%; | |
height: 80px; | |
background: #f0f0f0; | |
border-radius: 4px; | |
margin: 0; | |
} | |
#log { | |
background: #f0f0f0; | |
padding: 1em; | |
border-radius: 4px; | |
font-family: monospace; | |
max-height: 400px; | |
overflow-y: auto; | |
} | |
.controls { | |
margin: 1em 0; | |
padding: 1em; | |
background: #f8f8f8; | |
border-radius: 4px; | |
} | |
.function-card { | |
padding: 0.8em; | |
background: white; | |
border-radius: 4px; | |
box-shadow: 0 1px 3px rgba(0,0,0,0.1); | |
} | |
.function-card strong { | |
color: #1976d2; | |
} | |
.function-card ul { | |
color: #555; | |
} | |
button { | |
border: none; | |
padding: 0.5em 1em; | |
border-radius: 3px; | |
cursor: pointer; | |
transition: opacity 0.2s; | |
} | |
button:hover { | |
opacity: 0.9; | |
} | |
#connectButton { | |
background: #2196f3; | |
color: white; | |
} | |
.voice-start { | |
background: #4caf50; | |
color: white; | |
} | |
.voice-stop { | |
background: #f44336; | |
color: white; | |
} | |
</style> | |
</head> | |
<body> | |
<h1>Gemini Live Chat</h1> | |
<p>Interactive voice and text chat powered by Gemini AI that supports client-side function calling, code execution (server-side), and Google search capabilities.</p> | |
<p style="font-size: 0.9em; color: #666;">For server-side function calling, visit: <a href="https://huggingface.co/spaces/Nirav121/gemini-live" target="_blank">Gemini Live Function Call</a></p> | |
<div class="controls" style="background: #e3f2fd;"> | |
<h3 style="margin-top: 0;">Available Functions:</h3> | |
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1em;"> | |
<div class="function-card"> | |
<strong>💡 Light & Theme Control</strong> | |
<ul style="margin: 0.5em 0; padding-left: 1.5em;"> | |
<li>Turn lights on (logs only)</li> | |
<li>Turn lights off (logs only)</li> | |
<li>Change background color</li> | |
</ul> | |
</div> | |
<div class="function-card"> | |
<strong>🔍 Search</strong> | |
<ul style="margin: 0.5em 0; padding-left: 1.5em;"> | |
<li>Google search</li> | |
</ul> | |
</div> | |
<div class="function-card"> | |
<strong>💻 Code</strong> | |
<ul style="margin: 0.5em 0; padding-left: 1.5em;"> | |
<li>Execute code</li> | |
<li>Run commands</li> | |
</ul> | |
</div> | |
</div> | |
<p style="margin: 0.5em 0 0 0; font-size: 0.9em; color: #666;"> | |
Try saying: "Turn on the lights", "Change background to blue" or ask any question! | |
</p> | |
</div> | |
<div class="controls"> | |
<div style="display: flex; align-items: center; justify-content: space-between; gap: 1em;"> | |
<div style="display: flex; align-items: center; gap: 1em;"> | |
<div> | |
<span style="font-weight: 500; color: #666;">Server:</span> | |
<span id="connectionStatus" style="display: inline-block; padding: 0.3em 0.6em; margin-left: 0.5em; border-radius: 3px; background: #f44336; color: white;">Not connected</span> | |
</div> | |
<div id="micStatus" style="display: none;"> | |
<span style="font-weight: 500; color: #666;">Voice:</span> | |
<span style="display: inline-block; padding: 0.3em 0.6em; margin-left: 0.5em; border-radius: 3px; background: #4caf50; color: white;">Recording</span> | |
</div> | |
</div> | |
<div style="display: flex; gap: 0.5em;"> | |
<button id="connectButton" onclick="toggleConnection()"> | |
<span style="margin-right: 0.3em;">🔌</span> Connect to Server | |
</button> | |
<button id="voiceStartButton" class="voice-start" onclick="startCapture()" style="display: none;"> | |
<span style="margin-right: 0.3em;">🎤</span> Start Voice Chat | |
</button> | |
<button id="voiceStopButton" class="voice-stop" onclick="stopCapture()" style="display: none;"> | |
<span style="margin-right: 0.3em;">⏹️</span> Stop Voice Chat | |
</button> | |
</div> | |
</div> | |
<div style="margin-top: 1em; display: flex; gap: 0.5em;"> | |
<input type="text" id="textMessage" placeholder="Type your message here" style="flex: 1; padding: 0.5em; border: 1px solid #ddd; border-radius: 3px;" | |
onkeydown="if(event.key === 'Enter') { event.preventDefault(); sendText(); }" /> | |
<button onclick="sendText()" style="white-space: nowrap;"> | |
<span style="margin-right: 0.3em;">📤</span> Send | |
</button> | |
</div> | |
</div> | |
<div class="controls"> | |
<canvas id="visualizer"></canvas> | |
</div> | |
<div style="margin: 1em 0;"> | |
<strong>Log Settings:</strong><br> | |
<label><input type="checkbox" id="logWebSocket"> WebSocket Events</label> | |
<label style="margin-left: 1em"><input type="checkbox" id="logAudio"> Audio Events</label> | |
<label style="margin-left: 1em"><input type="checkbox" id="logText"> Text Events</label> | |
<label style="margin-left: 1em"><input type="checkbox" id="logFunction" checked> Function Events</label> | |
<label style="margin-left: 1em"><input type="checkbox" id="logError" checked> Error Events</label> | |
</div> | |
<pre id="log"></pre> | |
<script> | |
let socket; | |
let playbackCtx = null; | |
let nextPlaybackTime = 0; | |
let audioCtx; | |
let scriptNode; | |
let micStream; | |
let isCapturing = false; | |
let audioSeq = 0; | |
let scheduledSources = []; // Track scheduled audio sources | |
let analyser; | |
let visualizerCanvas; | |
let visualizerCtx; | |
let animationFrame; | |
function updateConnectionStatus(connected) { | |
const statusEl = document.getElementById('connectionStatus'); | |
const connectButton = document.getElementById('connectButton'); | |
const voiceStartButton = document.getElementById('voiceStartButton'); | |
if (connected) { | |
statusEl.textContent = 'Connected'; | |
statusEl.style.background = '#4caf50'; | |
connectButton.textContent = '🔌 Disconnect Server'; | |
voiceStartButton.style.display = ''; | |
} else { | |
statusEl.textContent = 'Not connected'; | |
statusEl.style.background = '#f44336'; | |
connectButton.textContent = '🔌 Connect to Server'; | |
voiceStartButton.style.display = 'none'; | |
// Also stop recording if we're disconnected | |
if (isCapturing) { | |
stopCapture(); | |
} | |
} | |
} | |
function updateMicStatus(recording) { | |
const micStatus = document.getElementById('micStatus'); | |
const voiceStartButton = document.getElementById('voiceStartButton'); | |
const voiceStopButton = document.getElementById('voiceStopButton'); | |
if (recording) { | |
micStatus.style.display = ''; | |
voiceStartButton.style.display = 'none'; | |
voiceStopButton.style.display = ''; | |
} else { | |
micStatus.style.display = 'none'; | |
voiceStartButton.style.display = ''; | |
voiceStopButton.style.display = 'none'; | |
} | |
} | |
function toggleConnection() { | |
if (socket && socket.readyState === WebSocket.OPEN) { | |
socket.close(); | |
} else { | |
connectWebSocket(); | |
} | |
} | |
function logMessage(category, ...args) { | |
const logElement = document.getElementById('log'); | |
const shouldLog = { | |
'websocket': document.getElementById('logWebSocket').checked, | |
'audio': document.getElementById('logAudio').checked, | |
'text': document.getElementById('logText').checked, | |
'function': document.getElementById('logFunction').checked, | |
'error': document.getElementById('logError').checked | |
}; | |
if (shouldLog[category]) { | |
const timestamp = new Date().toLocaleTimeString(); | |
const message = `[${timestamp}] [${category}] ${args.map(arg => | |
typeof arg === 'object' ? JSON.stringify(arg, null, 2) : arg | |
).join(' ')}`; | |
logElement.textContent = message + '\n' + logElement.textContent; | |
} | |
} | |
function clearScheduledAudio() { | |
// Stop and disconnect all scheduled audio sources | |
while (scheduledSources.length > 0) { | |
const source = scheduledSources.pop(); | |
try { | |
source.stop(); | |
source.disconnect(); | |
} catch (err) { | |
// Ignore errors if source already finished playing | |
} | |
} | |
// Reset next playback time | |
if (playbackCtx) { | |
nextPlaybackTime = playbackCtx.currentTime; | |
} | |
logMessage("Audio", "Cleared all scheduled audio"); | |
} | |
function setupVisualizer() { | |
visualizerCanvas = document.getElementById('visualizer'); | |
visualizerCtx = visualizerCanvas.getContext('2d'); | |
// Make canvas resolution match display size | |
const rect = visualizerCanvas.getBoundingClientRect(); | |
visualizerCanvas.width = rect.width; | |
visualizerCanvas.height = rect.height; | |
if (!analyser && playbackCtx) { | |
analyser = playbackCtx.createAnalyser(); | |
analyser.fftSize = 256; // Reduced for wider bars | |
analyser.minDecibels = -90; | |
analyser.maxDecibels = -10; | |
analyser.smoothingTimeConstant = 0.85; | |
} | |
} | |
function drawVisualizer() { | |
if (!analyser) return; | |
const bufferLength = analyser.frequencyBinCount; | |
const dataArray = new Uint8Array(bufferLength); | |
analyser.getByteFrequencyData(dataArray); | |
visualizerCtx.fillStyle = '#f0f0f0'; | |
visualizerCtx.fillRect(0, 0, visualizerCanvas.width, visualizerCanvas.height); | |
const barWidth = (visualizerCanvas.width / bufferLength) * 2.5; | |
const centerY = visualizerCanvas.height / 2; | |
let x = 0; | |
for (let i = 0; i < bufferLength; i++) { | |
const barHeight = (dataArray[i] / 255) * (visualizerCanvas.height / 2); // Half height for centering | |
// Create gradient for top half (going up) | |
const gradientTop = visualizerCtx.createLinearGradient(0, centerY, 0, centerY - barHeight); | |
gradientTop.addColorStop(0, '#4caf50'); // Green at center | |
gradientTop.addColorStop(1, '#81c784'); // Lighter green at top | |
// Create gradient for bottom half (going down) | |
const gradientBottom = visualizerCtx.createLinearGradient(0, centerY, 0, centerY + barHeight); | |
gradientBottom.addColorStop(0, '#4caf50'); // Green at center | |
gradientBottom.addColorStop(1, '#81c784'); // Lighter green at bottom | |
// Draw top half of the bar | |
visualizerCtx.fillStyle = gradientTop; | |
visualizerCtx.fillRect(x, centerY - barHeight, barWidth, barHeight); | |
// Draw bottom half of the bar | |
visualizerCtx.fillStyle = gradientBottom; | |
visualizerCtx.fillRect(x, centerY, barWidth, barHeight); | |
x += barWidth + 1; // Add 1 pixel gap between bars | |
} | |
animationFrame = requestAnimationFrame(drawVisualizer); | |
} | |
function stopVisualizer() { | |
if (animationFrame) { | |
cancelAnimationFrame(animationFrame); | |
animationFrame = null; | |
} | |
if (visualizerCtx) { | |
visualizerCtx.fillStyle = '#f0f0f0'; | |
visualizerCtx.fillRect(0, 0, visualizerCanvas.width, visualizerCanvas.height); | |
} | |
} | |
async function changeBackgroundColor(color) { | |
document.body.style.backgroundColor = color; | |
return `Background color changed to ${color}`; | |
} | |
function connectWebSocket() { | |
try { | |
const wsUrl = `${window.location.protocol === 'https:' ? 'wss:' : 'ws:'}//${window.location.host}/ws`; | |
socket = new WebSocket(wsUrl); | |
socket.onopen = () => { | |
logMessage('websocket', 'Connected to server'); | |
updateConnectionStatus(true); | |
}; | |
socket.onclose = () => { | |
logMessage('websocket', 'Disconnected from server'); | |
updateConnectionStatus(false); | |
}; | |
socket.onmessage = async (event) => { | |
const message = JSON.parse(event.data); | |
const messageType = message.type; | |
if (messageType === 'audio') { | |
// Handle audio data | |
logMessage('audio', 'Received audio chunk from server'); | |
const arrayBuffer = base64ToArrayBuffer(message.payload); | |
if (!playbackCtx) { | |
playbackCtx = new (window.AudioContext || window.webkitAudioContext)(); | |
setupVisualizer(); | |
} | |
// Convert Int16 PCM to Float32 | |
const int16View = new Int16Array(arrayBuffer); | |
const float32Buffer = new Float32Array(int16View.length); | |
for (let i = 0; i < int16View.length; i++) { | |
float32Buffer[i] = int16View[i] / 32768; | |
} | |
// Create audio buffer with correct sample rate | |
const sampleRate = 24000; // Sample rate from server | |
const audioBuffer = playbackCtx.createBuffer(1, float32Buffer.length, sampleRate); | |
audioBuffer.copyToChannel(float32Buffer, 0); | |
const source = playbackCtx.createBufferSource(); | |
source.buffer = audioBuffer; | |
// Connect through analyser for visualization if available | |
if (analyser) { | |
source.connect(analyser); | |
analyser.connect(playbackCtx.destination); | |
if (!animationFrame) { | |
drawVisualizer(); | |
} | |
} else { | |
source.connect(playbackCtx.destination); | |
} | |
// Schedule the audio to play at the right time | |
const startTime = Math.max(nextPlaybackTime, playbackCtx.currentTime); | |
source.start(startTime); | |
nextPlaybackTime = startTime + audioBuffer.duration; | |
// Keep track of scheduled sources | |
scheduledSources.push(source); | |
// Clean up source when it finishes playing | |
source.onended = () => { | |
const index = scheduledSources.indexOf(source); | |
if (index > -1) { | |
scheduledSources.splice(index, 1); | |
} | |
// Stop visualizer if no more audio playing | |
if (scheduledSources.length === 0) { | |
stopVisualizer(); | |
} | |
}; | |
} | |
else if (messageType === 'function_call') { | |
// Handle function calls from server | |
logMessage('function', 'Received function call:', message.payload); | |
const functionCalls = message.payload.functionCalls; | |
for (const fc of functionCalls) { | |
const functionName = fc.name; | |
const functionId = fc.id; | |
// Handle different functions | |
let result = 'ok'; | |
if (functionName === 'turn_on_the_lights') { | |
logMessage('function', 'Turning on the lights'); | |
// Simulate turning on lights | |
result = 'Lights turned on successfully'; | |
} | |
else if (functionName === 'turn_off_the_lights') { | |
logMessage('function', 'Turning off the lights'); | |
// Simulate turning off lights | |
result = 'Lights turned off successfully'; | |
} | |
else if (functionName === 'change_background') { | |
const color = fc.args?.color; | |
if (color) { | |
logMessage('function', 'Changing background color to:', color); | |
result = await changeBackgroundColor(color); | |
} else { | |
result = 'Error: No color specified'; | |
} | |
} | |
// Send response back to server | |
const response = { | |
type: 'tool_call_response', | |
payload: { | |
id: functionId, | |
name: functionName, | |
response: { | |
result: { | |
string_value: result | |
} | |
} | |
} | |
}; | |
socket.send(JSON.stringify(response)); | |
logMessage('function', 'Sent function response:', response); | |
} | |
} | |
}; | |
socket.onerror = (error) => { | |
logMessage('error', 'WebSocket error:', error); | |
}; | |
} catch (error) { | |
logMessage('error', 'Failed to connect:', error); | |
} | |
} | |
async function startCapture() { | |
if (!socket || socket.readyState !== WebSocket.OPEN) { | |
logMessage("WebSocket", "Not connected. Click 'Connect to Server' first."); | |
return; | |
} | |
if (isCapturing) { | |
logMessage("Audio", "Already capturing!"); | |
return; | |
} | |
isCapturing = true; | |
updateMicStatus(true); | |
logMessage("Audio", "Starting microphone capture..."); | |
try { | |
micStream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
logMessage("Audio", "Got microphone access"); | |
audioCtx = new (window.AudioContext || window.webkitAudioContext)(); | |
logMessage("Audio", "Created AudioContext with sample rate:", audioCtx.sampleRate); | |
// Create a media source from the mic stream | |
const source = audioCtx.createMediaStreamSource(micStream); | |
logMessage("Audio", "Created MediaStreamSource"); | |
// Create a ScriptProcessorNode | |
const bufferSize = 4096; // You can adjust this | |
const inputChannels = 1; | |
const outputChannels = 1; | |
scriptNode = audioCtx.createScriptProcessor(bufferSize, inputChannels, outputChannels); | |
logMessage("Audio", "Created ScriptProcessorNode with buffer size:", bufferSize); | |
scriptNode.onaudioprocess = (audioEvent) => { | |
if (!isCapturing) return; | |
// Get raw samples and resample to 16kHz | |
const inputBuffer = audioEvent.inputBuffer.getChannelData(0); | |
// Check if there's actual audio input (not just silence) | |
const hasAudio = inputBuffer.some(sample => Math.abs(sample) > 0.01); // Threshold for noise | |
if (hasAudio) { | |
clearScheduledAudio(); // Only clear when we detect actual audio input | |
} | |
const resampled = resampleAudio(inputBuffer, audioCtx.sampleRate, 16000); | |
// Convert resampled audio to 16-bit PCM | |
const pcm16 = floatTo16BitPCM(resampled); | |
// Encode as base64 and send over WebSocket | |
const bytes = new Uint8Array(pcm16.buffer); | |
const b64 = btoa(String.fromCharCode(...bytes)); | |
const audioMsg = { | |
type: "audio", | |
payload: b64, | |
seq: audioSeq++, | |
config: { | |
sampleRate: 16000, | |
bitDepth: 16, | |
channels: 1 | |
} | |
}; | |
logMessage("Audio", "Processing chunk. Seq:", audioMsg.seq); | |
try { | |
if (socket.readyState === WebSocket.OPEN) { | |
socket.send(JSON.stringify(audioMsg)); | |
} else { | |
logMessage("WebSocket", "Not open, stopping capture"); | |
stopCapture(); | |
} | |
} catch (err) { | |
logMessage("Error", "Failed to send audio:", err); | |
stopCapture(); | |
} | |
}; | |
// Connect the pipeline: mic -> script -> (optional) audioCtx.destination | |
source.connect(scriptNode); | |
logMessage("Audio", "Connected audio pipeline"); | |
logMessage("Audio", "Recording..."); | |
} catch (err) { | |
logMessage("Error", "Failed to get microphone access:", err); | |
isCapturing = false; | |
} | |
} | |
function stopCapture() { | |
if (!isCapturing) return; | |
isCapturing = false; | |
updateMicStatus(false); | |
logMessage("Audio", "Stopped microphone capture"); | |
if (scriptNode) { | |
scriptNode.disconnect(); | |
scriptNode.onaudioprocess = null; | |
scriptNode = null; | |
} | |
if (micStream) { | |
// Stop all tracks | |
micStream.getTracks().forEach(track => track.stop()); | |
micStream = null; | |
} | |
if (audioCtx) { | |
audioCtx.close(); | |
audioCtx = null; | |
} | |
} | |
function floatTo16BitPCM(floatSamples) { | |
// Convert an array of floats [-1, 1] to a Int16Array | |
const out = new Int16Array(floatSamples.length); | |
for (let i = 0; i < floatSamples.length; i++) { | |
let s = Math.max(-1, Math.min(1, floatSamples[i])); | |
// scale range | |
s = s < 0 ? s * 0x8000 : s * 0x7FFF; | |
out[i] = s; | |
} | |
return out; | |
} | |
function resampleAudio(inputBuffer, fromRate, toRate) { | |
const ratio = toRate / fromRate; | |
const newLength = Math.round(inputBuffer.length * ratio); | |
const resampled = new Float32Array(newLength); | |
for(let i = 0; i < newLength; i++) { | |
const index = Math.round(i / ratio); | |
resampled[i] = inputBuffer[Math.min(index, inputBuffer.length-1)]; | |
} | |
return resampled; | |
} | |
function sendText() { | |
const textInput = document.getElementById("textMessage"); | |
const text = textInput.value.trim(); | |
if (text && socket && socket.readyState === WebSocket.OPEN) { | |
// Clear any scheduled audio before sending text | |
clearScheduledAudio(); | |
socket.send(JSON.stringify({ type: "text", content: text })); | |
logMessage("Text", "Sent:", text); | |
textInput.value = ""; | |
} else { | |
logMessage("WebSocket", "Not connected or text is empty"); | |
} | |
} | |
function base64ToArrayBuffer(b64) { | |
const binaryString = window.atob(b64); | |
const len = binaryString.length; | |
const bytes = new Uint8Array(len); | |
for (let i = 0; i < len; i++) { | |
bytes[i] = binaryString.charCodeAt(i); | |
} | |
return bytes.buffer; | |
} | |
</script> | |
</body> | |
</html> | |