BryanBradfo commited on
Commit
1ea0a09
·
1 Parent(s): 1c7a116

change of everything

Browse files
Files changed (2) hide show
  1. app.py +107 -215
  2. index.html +166 -60
app.py CHANGED
@@ -1,10 +1,7 @@
1
  import json
2
  import os
3
  import time
4
- import asyncio
5
- import threading
6
  from pathlib import Path
7
- from queue import Queue
8
 
9
  import gradio as gr
10
  import numpy as np
@@ -23,229 +20,131 @@ from fastrtc.utils import audio_to_bytes
23
  from gradio.utils import get_space
24
  from groq import Groq
25
  from pydantic import BaseModel
26
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
27
 
28
  load_dotenv()
29
 
30
- # Initialize all clients and models upfront to avoid cold start latency
31
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY", ""))
32
-
33
- # Load a small but effective model - initialize once and reuse
34
- print("Loading model...")
35
- model_name = "distilgpt2" # Can be changed to "TinyLlama/TinyLlama-1.1B-Chat-v1.0" for better quality
36
- tokenizer = AutoTokenizer.from_pretrained(model_name)
37
- model = AutoModelForCausalLM.from_pretrained(model_name)
38
- print("Model loaded!")
39
-
40
  tts_client = ElevenLabs(api_key=os.environ["ELEVENLABS_API_KEY"])
41
- tts_model = get_tts_model()
42
 
43
  curr_dir = Path(__file__).parent
44
 
45
- # Pre-warm the model
46
- _ = model.generate(tokenizer.encode("Hello", return_tensors="pt"), max_length=5)
47
- print("Model warmed up")
48
-
49
- # Create a transcription queue for parallel processing
50
- transcription_queue = Queue()
51
- response_queue = Queue()
52
-
53
- # Global session state
54
- sessions = {}
55
-
56
- def transcribe_audio(audio_bytes):
57
- """Transcribe audio using Groq's Whisper API"""
58
- try:
59
- return groq_client.audio.transcriptions.create(
60
- file=("audio-file.mp3", audio_bytes),
61
- model="whisper-large-v3-turbo",
62
- response_format="verbose_json",
63
- ).text
64
- except Exception as e:
65
- print(f"Transcription error: {e}")
66
- return "I couldn't understand that. Could you try again?"
67
-
68
- def generate_response(prompt, session_id):
69
- """Generate response using the model"""
70
- try:
71
- # Get or create session history
72
- if session_id not in sessions:
73
- sessions[session_id] = []
74
-
75
- # Keep only last 3 exchanges for context
76
- history = sessions[session_id][-3:] if sessions[session_id] else []
77
-
78
- # Build context from history
79
- context = "You are a helpful assistant. Keep responses brief and direct.\n"
80
- for item in history:
81
- role = item.get("role", "")
82
- content = item.get("content", "")
83
- if role and content:
84
- prefix = "User: " if role == "user" else "Assistant: "
85
- context += f"{prefix}{content}\n"
86
-
87
- context += f"User: {prompt}\nAssistant:"
88
-
89
- # Encode the prompt
90
- inputs = tokenizer(context, return_tensors="pt")
91
-
92
- # Generate
93
- with torch.no_grad():
94
- output = model.generate(
95
- inputs.input_ids,
96
- max_length=len(inputs.input_ids[0]) + 100,
97
- temperature=0.7,
98
- top_p=0.9,
99
- repetition_penalty=1.2,
100
- do_sample=True,
101
- pad_token_id=tokenizer.eos_token_id
102
- )
103
-
104
- # Decode and clean
105
- full_output = tokenizer.decode(output[0], skip_special_tokens=True)
106
-
107
- # Extract just the assistant's response
108
- response = full_output.split("Assistant:")[-1].strip()
109
-
110
- # Clean up the response (remove repetition)
111
- sentences = []
112
- for sent in response.split('. '):
113
- if sent and sent not in sentences:
114
- sentences.append(sent)
115
-
116
- cleaned_response = '. '.join(sentences)
117
- if not cleaned_response.endswith('.'):
118
- cleaned_response += '.'
119
-
120
- return cleaned_response
121
-
122
- except Exception as e:
123
- print(f"Response generation error: {e}")
124
- return "I'm processing that. One moment please."
125
-
126
- def background_transcription():
127
- """Background worker for transcription"""
128
- while True:
129
- session_id, audio_data = transcription_queue.get()
130
- if session_id is None: # Poison pill
131
- break
132
-
133
- transcript = transcribe_audio(audio_data)
134
- response_queue.put((session_id, "user", transcript))
135
-
136
- # Add to session history
137
- if session_id not in sessions:
138
- sessions[session_id] = []
139
- sessions[session_id].append({"role": "user", "content": transcript})
140
-
141
- # Generate response in the same thread for simplicity
142
- response_text = generate_response(transcript, session_id)
143
- response_queue.put((session_id, "assistant", response_text))
144
-
145
- # Add to session history
146
- sessions[session_id].append({"role": "assistant", "content": response_text})
147
-
148
- transcription_queue.task_done()
149
-
150
- # Start the background worker
151
- transcription_thread = threading.Thread(target=background_transcription, daemon=True)
152
- transcription_thread.start()
153
 
154
- def optimize_tts(text):
155
- """Optimize text for better TTS performance"""
156
- # Break into smaller chunks at natural boundaries
157
- chunks = []
158
- current_chunk = ""
159
-
160
- for sentence in text.split('. '):
161
- if not sentence.strip():
162
- continue
163
-
164
- if len(current_chunk) + len(sentence) > 100:
165
- if current_chunk:
166
- chunks.append(current_chunk.strip())
167
- current_chunk = sentence
168
- else:
169
- if current_chunk:
170
- current_chunk += ". " + sentence
171
- else:
172
- current_chunk = sentence
173
 
174
- if current_chunk:
175
- chunks.append(current_chunk.strip())
 
176
 
177
- # Make sure chunks end with periods
178
- for i in range(len(chunks)):
179
- if not chunks[i].endswith('.'):
180
- chunks[i] += '.'
181
-
182
- return chunks
183
 
184
  def response(
185
  audio: tuple[int, np.ndarray],
186
  chatbot: list[dict] | None = None,
187
- session_id=None
188
  ):
189
- if session_id is None:
190
- session_id = f"session_{time.time()}"
191
-
192
  chatbot = chatbot or []
193
- audio_bytes = audio_to_bytes(audio)
194
-
195
- # Queue the audio for transcription
196
- transcription_queue.put((session_id, audio_bytes))
197
-
198
- # Wait for transcription and response (with timeout for real-time experience)
199
- waited = 0
200
- transcript = None
201
- response_text = None
202
-
203
- # Get user transcript first
204
- while waited < 5: # 5 seconds max wait
205
- if not response_queue.empty():
206
- sid, role, text = response_queue.get()
207
- if sid == session_id and role == "user":
208
- transcript = text
209
- break
210
- time.sleep(0.1)
211
- waited += 0.1
212
-
213
- if transcript is None:
214
- # If transcription is taking too long, provide immediate feedback
215
- transcript = "Processing your message..."
216
-
217
- # Update chatbot with user message
218
- chatbot.append({"role": "user", "content": transcript})
219
- yield AdditionalOutputs(chatbot)
220
-
221
- # Get assistant response
222
- waited = 0
223
- while waited < 7: # 7 seconds max wait
224
- if not response_queue.empty():
225
- sid, role, text = response_queue.get()
226
- if sid == session_id and role == "assistant":
227
- response_text = text
228
- break
229
- time.sleep(0.1)
230
- waited += 0.1
231
-
232
- if response_text is None:
233
- # If response generation is taking too long, provide immediate feedback
234
- response_text = "I'm thinking about that. One moment please."
235
 
236
- # Update chatbot with assistant message
237
- chatbot.append({"role": "assistant", "content": response_text})
238
- yield AdditionalOutputs(chatbot)
239
-
240
- # Stream TTS in smaller chunks for real-time experience
241
  try:
242
- chunks = optimize_tts(response_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
- for chunk in chunks:
245
- for audio_chunk in tts_model.stream_tts_sync(chunk):
246
- yield audio_chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  except Exception as e:
248
- print(f"TTS error: {e}")
 
 
 
 
 
 
 
 
 
 
 
249
 
250
 
251
  chatbot = gr.Chatbot(type="messages")
@@ -296,26 +195,19 @@ def _(webrtc_id: str):
296
  async for output in stream.output_stream(webrtc_id):
297
  chatbot = output.args[0]
298
  if chatbot and len(chatbot) > 0:
299
- # Only send the most recent message to avoid duplicates
300
  yield f"event: output\ndata: {json.dumps(chatbot[-1])}\n\n"
301
 
302
  return StreamingResponse(output_stream(), media_type="text/event-stream")
303
 
304
 
305
  if __name__ == "__main__":
306
- try:
307
- import torch
308
- print("Running with PyTorch")
309
- except ImportError:
310
- print("PyTorch not available, might affect performance")
311
-
312
  import os
313
- print("Starting server...")
314
-
315
  if (mode := os.getenv("MODE")) == "UI":
316
  stream.ui.launch(server_port=7860, server_name="0.0.0.0")
317
  elif mode == "PHONE":
318
  stream.fastphone(host="0.0.0.0", port=7860)
319
  else:
320
  import uvicorn
 
321
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import json
2
  import os
3
  import time
 
 
4
  from pathlib import Path
 
5
 
6
  import gradio as gr
7
  import numpy as np
 
20
  from gradio.utils import get_space
21
  from groq import Groq
22
  from pydantic import BaseModel
23
+ from transformers import pipeline
24
 
25
  load_dotenv()
26
 
27
+ groq_client = Groq()
28
+ # Using a better but still small free model - distilGPT2 is more coherent than OPT-125M
29
+ text_generation = pipeline(
30
+ "text-generation",
31
+ model="distilgpt2", # Better free model that's still small
32
+ device_map="auto",
33
+ truncation=True
34
+ )
 
 
35
  tts_client = ElevenLabs(api_key=os.environ["ELEVENLABS_API_KEY"])
 
36
 
37
  curr_dir = Path(__file__).parent
38
 
39
+ tts_model = get_tts_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # Keep track of last response to prevent repetition
42
+ last_response = ""
43
+
44
+ def clean_response(text):
45
+ """Clean the generated text to avoid repetition and improve quality"""
46
+ # Remove repeated phrases (simple approach)
47
+ sentences = text.split('. ')
48
+ cleaned_sentences = []
49
+ for s in sentences:
50
+ if s and s not in cleaned_sentences:
51
+ cleaned_sentences.append(s)
 
 
 
 
 
 
 
 
52
 
53
+ cleaned_text = '. '.join(cleaned_sentences)
54
+ if not cleaned_text.endswith('.'):
55
+ cleaned_text += '.'
56
 
57
+ # Limit length to avoid very long responses
58
+ if len(cleaned_text) > 200:
59
+ cleaned_text = cleaned_text[:197] + "..."
60
+
61
+ return cleaned_text
 
62
 
63
  def response(
64
  audio: tuple[int, np.ndarray],
65
  chatbot: list[dict] | None = None,
 
66
  ):
67
+ global last_response
 
 
68
  chatbot = chatbot or []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
 
 
 
 
 
70
  try:
71
+ prompt = groq_client.audio.transcriptions.create(
72
+ file=("audio-file.mp3", audio_to_bytes(audio)),
73
+ model="whisper-large-v3-turbo",
74
+ response_format="verbose_json",
75
+ ).text
76
+ chatbot.append({"role": "user", "content": prompt})
77
+ yield AdditionalOutputs(chatbot)
78
+
79
+ # Create a better prompt for the model
80
+ context = "You are a helpful assistant. Keep your responses short and to the point."
81
+ if chatbot and len(chatbot) > 1:
82
+ # Add the last exchange for context
83
+ context += f"\nPrevious: {chatbot[-2]['content']}\nYou: {prompt}"
84
+ else:
85
+ context += f"\nUser: {prompt}"
86
+
87
+ # Generate response using the Hugging Face model
88
+ generated_text = text_generation(
89
+ context,
90
+ max_length=150, # Longer context but still reasonable
91
+ num_return_sequences=1,
92
+ do_sample=True,
93
+ top_p=0.92,
94
+ temperature=0.7,
95
+ repetition_penalty=1.2, # Penalize repetition
96
+ )
97
+
98
+ # Extract only the new content
99
+ full_text = generated_text[0]['generated_text']
100
+ response_text = full_text.replace(context, "").strip()
101
+
102
+ # Clean up the response
103
+ response_text = clean_response(response_text)
104
+
105
+ # Prevent exact repetition from previous response
106
+ if response_text == last_response:
107
+ response_text = "I understand. Can you elaborate on that?"
108
+
109
+ last_response = response_text
110
 
111
+ # Add a fallback if the response is empty or too short
112
+ if len(response_text) < 10:
113
+ response_text = "I see. Could you tell me more about that?"
114
+
115
+ chatbot.append({"role": "assistant", "content": response_text})
116
+ yield AdditionalOutputs(chatbot) # Send chatbot update first
117
+
118
+ # Split the audio generation into smaller chunks to avoid repeating the entire message
119
+ sentences = response_text.split('. ')
120
+ start = time.time()
121
+
122
+ print("starting tts", start)
123
+ # Process each sentence separately for TTS
124
+ for i, sentence in enumerate(sentences):
125
+ if not sentence.strip():
126
+ continue
127
+
128
+ sentence = sentence.strip() + "."
129
+ for chunk in tts_model.stream_tts_sync(sentence):
130
+ print(f"chunk {i}.{chunk}", time.time() - start)
131
+ yield chunk
132
+
133
+ print("finished tts", time.time() - start)
134
+
135
  except Exception as e:
136
+ print(f"Error in response generation: {e}")
137
+ error_message = "Sorry, I encountered an error processing your request."
138
+ chatbot.append({"role": "assistant", "content": error_message})
139
+ yield AdditionalOutputs(chatbot)
140
+
141
+ # Still try to produce audio for the error message
142
+ try:
143
+ for chunk in tts_model.stream_tts_sync(error_message):
144
+ yield chunk
145
+ except Exception:
146
+ # If even TTS fails, just return with the error in chatbot
147
+ pass
148
 
149
 
150
  chatbot = gr.Chatbot(type="messages")
 
195
  async for output in stream.output_stream(webrtc_id):
196
  chatbot = output.args[0]
197
  if chatbot and len(chatbot) > 0:
 
198
  yield f"event: output\ndata: {json.dumps(chatbot[-1])}\n\n"
199
 
200
  return StreamingResponse(output_stream(), media_type="text/event-stream")
201
 
202
 
203
  if __name__ == "__main__":
 
 
 
 
 
 
204
  import os
205
+
 
206
  if (mode := os.getenv("MODE")) == "UI":
207
  stream.ui.launch(server_port=7860, server_name="0.0.0.0")
208
  elif mode == "PHONE":
209
  stream.fastphone(host="0.0.0.0", port=7860)
210
  else:
211
  import uvicorn
212
+
213
  uvicorn.run(app, host="0.0.0.0", port=7860)
index.html CHANGED
@@ -4,109 +4,168 @@
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>RetroChat Audio</title>
8
  <style>
 
 
9
  body {
10
- font-family: monospace;
11
- background-color: #1a1a1a;
12
- color: #00ff00;
13
  margin: 0;
14
  padding: 20px;
15
  height: 100vh;
16
  box-sizing: border-box;
17
  }
 
18
  .container {
19
  display: flex;
20
  flex-direction: column;
21
  gap: 20px;
22
  height: calc(100% - 100px);
23
  margin-bottom: 20px;
 
 
24
  }
 
25
  .chat-container {
26
- border: 2px solid #00ff00;
 
27
  padding: 20px;
28
  display: flex;
29
  flex-direction: column;
30
  flex-grow: 1;
31
  box-sizing: border-box;
 
 
32
  }
 
33
  .controls-container {
34
- border: 2px solid #00ff00;
 
35
  padding: 20px;
36
  display: flex;
37
  align-items: center;
38
  gap: 20px;
39
  height: 128px;
40
  box-sizing: border-box;
 
 
41
  }
 
42
  .visualization-container {
43
  flex-grow: 1;
44
  display: flex;
45
  align-items: center;
46
  }
 
47
  .box-container {
48
  display: flex;
49
  justify-content: space-between;
50
  height: 64px;
51
  width: 100%;
52
  }
 
53
  .box {
54
  height: 100%;
55
  width: 8px;
56
- background: #00ff00;
57
- border-radius: 8px;
58
  transition: transform 0.05s ease;
59
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  .chat-messages {
61
  flex-grow: 1;
62
  overflow-y: auto;
63
  margin-bottom: 20px;
64
- padding: 10px;
65
- border: 1px solid #00ff00;
 
 
66
  }
 
67
  .message {
68
  margin-bottom: 10px;
69
- padding: 8px;
70
- border-radius: 4px;
 
 
 
71
  }
 
72
  .message.user {
73
- background-color: #003300;
 
 
74
  }
 
75
  .message.assistant {
76
- background-color: #002200;
 
 
 
77
  }
 
78
  button {
79
  height: 64px;
80
  min-width: 120px;
81
- background-color: #000;
82
- color: #00ff00;
83
- border: 2px solid #00ff00;
 
84
  padding: 10px 20px;
85
- font-family: monospace;
86
- font-size: 16px;
 
87
  cursor: pointer;
88
  transition: all 0.3s;
 
89
  }
 
90
  button:hover {
91
- border-width: 3px;
 
 
 
 
 
 
92
  }
 
93
  #audio-output {
94
  display: none;
95
  }
96
- /* Retro CRT effect */
97
- .crt-overlay {
 
98
  position: absolute;
99
- top: 0;
100
- left: 0;
101
- width: 100%;
102
- height: 100%;
103
- background: repeating-linear-gradient(0deg,
104
- rgba(0, 255, 0, 0.03),
105
- rgba(0, 255, 0, 0.03) 1px,
106
- transparent 1px,
107
- transparent 2px);
108
  pointer-events: none;
 
 
 
 
 
 
 
 
 
 
109
  }
 
110
  /* Add these new styles */
111
  .icon-with-spinner {
112
  display: flex;
@@ -115,20 +174,23 @@
115
  gap: 12px;
116
  min-width: 180px;
117
  }
 
118
  .spinner {
119
  width: 20px;
120
  height: 20px;
121
- border: 2px solid #00ff00;
122
  border-top-color: transparent;
123
  border-radius: 50%;
124
  animation: spin 1s linear infinite;
125
  flex-shrink: 0;
126
  }
 
127
  @keyframes spin {
128
  to {
129
  transform: rotate(360deg);
130
  }
131
  }
 
132
  .pulse-container {
133
  display: flex;
134
  align-items: center;
@@ -136,53 +198,66 @@
136
  gap: 12px;
137
  min-width: 180px;
138
  }
 
139
  .pulse-circle {
140
  width: 20px;
141
  height: 20px;
142
  border-radius: 50%;
143
- background-color: #00ff00;
144
- opacity: 0.2;
145
  flex-shrink: 0;
146
  transform: translateX(-0%) scale(var(--audio-level, 1));
147
  transition: transform 0.1s ease;
 
148
  }
 
149
  /* Add styles for typing indicator */
150
  .typing-indicator {
151
- padding: 8px;
152
- background-color: #002200;
153
- border-radius: 4px;
 
154
  margin-bottom: 10px;
155
  display: none;
 
156
  }
 
157
  .dots {
158
  display: inline-flex;
159
  gap: 4px;
160
  }
 
161
  .dot {
162
- width: 8px;
163
- height: 8px;
164
- background-color: #00ff00;
165
  border-radius: 50%;
166
- animation: pulse 1.5s infinite;
167
- opacity: 0.5;
 
 
 
168
  }
 
169
  .dot:nth-child(2) {
170
- animation-delay: 0.5s;
 
171
  }
 
172
  .dot:nth-child(3) {
173
- animation-delay: 1s;
 
174
  }
175
- @keyframes pulse {
176
- 0%,
177
- 100% {
178
- opacity: 0.5;
179
- transform: scale(1);
180
  }
181
  50% {
182
- opacity: 1;
183
- transform: scale(1.2);
184
  }
185
  }
 
186
  /* Add styles for toast notifications */
187
  .toast {
188
  position: fixed;
@@ -190,27 +265,58 @@
190
  left: 50%;
191
  transform: translateX(-50%);
192
  padding: 16px 24px;
193
- border-radius: 4px;
194
- font-size: 14px;
195
  z-index: 1000;
196
  display: none;
197
- box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
198
  }
 
199
  .toast.error {
200
- background-color: #f44336;
201
  color: white;
202
  }
 
203
  .toast.warning {
204
- background-color: #ffd700;
205
- color: black;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  }
207
  </style>
208
  </head>
209
 
210
  <body>
 
 
 
 
 
 
211
  <!-- Add toast element after body opening tag -->
212
  <div id="error-toast" class="toast"></div>
 
213
  <div class="container">
 
 
 
 
 
214
  <div class="chat-container">
215
  <div class="chat-messages" id="chat-messages"></div>
216
  <!-- Move typing indicator outside the chat messages -->
@@ -228,7 +334,7 @@
228
  <!-- Boxes will be dynamically added here -->
229
  </div>
230
  </div>
231
- <button id="start-button">Press to talk with me</button>
232
  </div>
233
  </div>
234
  <audio id="audio-output"></audio>
@@ -260,7 +366,7 @@
260
  </div>
261
  `;
262
  } else {
263
- startButton.innerHTML = 'Start';
264
  }
265
  }
266
  function showError(message) {
@@ -466,7 +572,7 @@
466
  updateButtonState();
467
  }
468
  startButton.addEventListener('click', () => {
469
- if (startButton.textContent === 'Start') {
470
  setupWebRTC();
471
  } else {
472
  stop();
 
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>HappyChat Audio</title>
8
  <style>
9
+ @import url('https://fonts.googleapis.com/css2?family=Comic+Neue:wght@400;700&display=swap');
10
+
11
  body {
12
+ font-family: 'Comic Neue', cursive;
13
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
14
+ color: #333;
15
  margin: 0;
16
  padding: 20px;
17
  height: 100vh;
18
  box-sizing: border-box;
19
  }
20
+
21
  .container {
22
  display: flex;
23
  flex-direction: column;
24
  gap: 20px;
25
  height: calc(100% - 100px);
26
  margin-bottom: 20px;
27
+ max-width: 1200px;
28
+ margin: 0 auto;
29
  }
30
+
31
  .chat-container {
32
+ border: 3px solid #ff6b6b;
33
+ border-radius: 16px;
34
  padding: 20px;
35
  display: flex;
36
  flex-direction: column;
37
  flex-grow: 1;
38
  box-sizing: border-box;
39
+ background-color: #fff;
40
+ box-shadow: 0 8px 20px rgba(0, 0, 0, 0.1);
41
  }
42
+
43
  .controls-container {
44
+ border: 3px solid #4ecdc4;
45
+ border-radius: 16px;
46
  padding: 20px;
47
  display: flex;
48
  align-items: center;
49
  gap: 20px;
50
  height: 128px;
51
  box-sizing: border-box;
52
+ background-color: #fff;
53
+ box-shadow: 0 8px 20px rgba(0, 0, 0, 0.1);
54
  }
55
+
56
  .visualization-container {
57
  flex-grow: 1;
58
  display: flex;
59
  align-items: center;
60
  }
61
+
62
  .box-container {
63
  display: flex;
64
  justify-content: space-between;
65
  height: 64px;
66
  width: 100%;
67
  }
68
+
69
  .box {
70
  height: 100%;
71
  width: 8px;
72
+ background: #ff6b6b;
73
+ border-radius: 20px;
74
  transition: transform 0.05s ease;
75
  }
76
+
77
+ .box:nth-child(odd) {
78
+ background: #ffcc5c;
79
+ }
80
+
81
+ .box:nth-child(3n) {
82
+ background: #4ecdc4;
83
+ }
84
+
85
+ .box:nth-child(5n) {
86
+ background: #c16ecf;
87
+ }
88
+
89
  .chat-messages {
90
  flex-grow: 1;
91
  overflow-y: auto;
92
  margin-bottom: 20px;
93
+ padding: 15px;
94
+ border: 2px solid #ffcc5c;
95
+ border-radius: 12px;
96
+ background-color: #f9f9f9;
97
  }
98
+
99
  .message {
100
  margin-bottom: 10px;
101
+ padding: 12px;
102
+ border-radius: 18px;
103
+ font-size: 16px;
104
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
105
+ max-width: 80%;
106
  }
107
+
108
  .message.user {
109
+ background-color: #ffcc5c;
110
+ margin-left: auto;
111
+ border-bottom-right-radius: 4px;
112
  }
113
+
114
  .message.assistant {
115
+ background-color: #4ecdc4;
116
+ color: white;
117
+ margin-right: auto;
118
+ border-bottom-left-radius: 4px;
119
  }
120
+
121
  button {
122
  height: 64px;
123
  min-width: 120px;
124
+ background-color: #ff6b6b;
125
+ color: white;
126
+ border: none;
127
+ border-radius: 32px;
128
  padding: 10px 20px;
129
+ font-family: 'Comic Neue', cursive;
130
+ font-size: 18px;
131
+ font-weight: bold;
132
  cursor: pointer;
133
  transition: all 0.3s;
134
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
135
  }
136
+
137
  button:hover {
138
+ transform: translateY(-3px);
139
+ box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15);
140
+ background-color: #ff5252;
141
+ }
142
+
143
+ button:active {
144
+ transform: translateY(1px);
145
  }
146
+
147
  #audio-output {
148
  display: none;
149
  }
150
+
151
+ /* Floating bubbles animation */
152
+ .bubble {
153
  position: absolute;
154
+ border-radius: 50%;
155
+ opacity: 0.6;
 
 
 
 
 
 
 
156
  pointer-events: none;
157
+ animation: float 15s infinite ease-in-out;
158
+ }
159
+
160
+ @keyframes float {
161
+ 0%, 100% {
162
+ transform: translateY(0) rotate(0deg);
163
+ }
164
+ 50% {
165
+ transform: translateY(-80px) rotate(180deg);
166
+ }
167
  }
168
+
169
  /* Add these new styles */
170
  .icon-with-spinner {
171
  display: flex;
 
174
  gap: 12px;
175
  min-width: 180px;
176
  }
177
+
178
  .spinner {
179
  width: 20px;
180
  height: 20px;
181
+ border: 2px solid #fff;
182
  border-top-color: transparent;
183
  border-radius: 50%;
184
  animation: spin 1s linear infinite;
185
  flex-shrink: 0;
186
  }
187
+
188
  @keyframes spin {
189
  to {
190
  transform: rotate(360deg);
191
  }
192
  }
193
+
194
  .pulse-container {
195
  display: flex;
196
  align-items: center;
 
198
  gap: 12px;
199
  min-width: 180px;
200
  }
201
+
202
  .pulse-circle {
203
  width: 20px;
204
  height: 20px;
205
  border-radius: 50%;
206
+ background-color: #ffcc5c;
207
+ opacity: 0.8;
208
  flex-shrink: 0;
209
  transform: translateX(-0%) scale(var(--audio-level, 1));
210
  transition: transform 0.1s ease;
211
+ box-shadow: 0 0 10px #ffcc5c;
212
  }
213
+
214
  /* Add styles for typing indicator */
215
  .typing-indicator {
216
+ padding: 12px;
217
+ background-color: #f0f0f0;
218
+ border-radius: 18px;
219
+ border-bottom-left-radius: 4px;
220
  margin-bottom: 10px;
221
  display: none;
222
+ width: fit-content;
223
  }
224
+
225
  .dots {
226
  display: inline-flex;
227
  gap: 4px;
228
  }
229
+
230
  .dot {
231
+ width: 10px;
232
+ height: 10px;
233
+ background-color: #ff6b6b;
234
  border-radius: 50%;
235
+ animation: bounce 1.5s infinite;
236
+ }
237
+
238
+ .dot:nth-child(1) {
239
+ background-color: #ff6b6b;
240
  }
241
+
242
  .dot:nth-child(2) {
243
+ animation-delay: 0.2s;
244
+ background-color: #ffcc5c;
245
  }
246
+
247
  .dot:nth-child(3) {
248
+ animation-delay: 0.4s;
249
+ background-color: #4ecdc4;
250
  }
251
+
252
+ @keyframes bounce {
253
+ 0%, 100% {
254
+ transform: translateY(0);
 
255
  }
256
  50% {
257
+ transform: translateY(-10px);
 
258
  }
259
  }
260
+
261
  /* Add styles for toast notifications */
262
  .toast {
263
  position: fixed;
 
265
  left: 50%;
266
  transform: translateX(-50%);
267
  padding: 16px 24px;
268
+ border-radius: 12px;
269
+ font-size: 16px;
270
  z-index: 1000;
271
  display: none;
272
+ box-shadow: 0 8px 16px rgba(0, 0, 0, 0.2);
273
  }
274
+
275
  .toast.error {
276
+ background-color: #ff6b6b;
277
  color: white;
278
  }
279
+
280
  .toast.warning {
281
+ background-color: #ffcc5c;
282
+ color: #333;
283
+ }
284
+
285
+ .header {
286
+ text-align: center;
287
+ margin-bottom: 20px;
288
+ }
289
+
290
+ .header h1 {
291
+ color: #ff6b6b;
292
+ font-size: 36px;
293
+ margin-bottom: 8px;
294
+ text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.1);
295
+ }
296
+
297
+ .header p {
298
+ color: #666;
299
+ font-size: 18px;
300
  }
301
  </style>
302
  </head>
303
 
304
  <body>
305
+ <!-- Add animated bubbles -->
306
+ <div class="bubble" style="width: 60px; height: 60px; background: #ff6b6b; left: 10%; top: 20%;"></div>
307
+ <div class="bubble" style="width: 40px; height: 40px; background: #ffcc5c; right: 15%; top: 10%;"></div>
308
+ <div class="bubble" style="width: 80px; height: 80px; background: #4ecdc4; left: 20%; bottom: 10%;"></div>
309
+ <div class="bubble" style="width: 50px; height: 50px; background: #c16ecf; right: 20%; bottom: 20%;"></div>
310
+
311
  <!-- Add toast element after body opening tag -->
312
  <div id="error-toast" class="toast"></div>
313
+
314
  <div class="container">
315
+ <div class="header">
316
+ <h1>HappyChat</h1>
317
+ <p>Speak and listen to your friendly AI assistant!</p>
318
+ </div>
319
+
320
  <div class="chat-container">
321
  <div class="chat-messages" id="chat-messages"></div>
322
  <!-- Move typing indicator outside the chat messages -->
 
334
  <!-- Boxes will be dynamically added here -->
335
  </div>
336
  </div>
337
+ <button id="start-button">Start Chatting!</button>
338
  </div>
339
  </div>
340
  <audio id="audio-output"></audio>
 
366
  </div>
367
  `;
368
  } else {
369
+ startButton.innerHTML = 'Start Chatting!';
370
  }
371
  }
372
  function showError(message) {
 
572
  updateButtonState();
573
  }
574
  startButton.addEventListener('click', () => {
575
+ if (startButton.textContent === 'Start Chatting!') {
576
  setupWebRTC();
577
  } else {
578
  stop();