IAMTFRMZA commited on
Commit
e0cd5da
·
verified ·
1 Parent(s): d13b654

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -16
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # top of the file
2
  import gradio as gr
3
  import os, time, re, json, base64, asyncio, threading, uuid, io
4
  import numpy as np
@@ -130,11 +129,27 @@ def handle_chat(user_input, history, thread_id, image_url):
130
  with gr.Blocks(theme=gr.themes.Soft()) as app:
131
  gr.Markdown("# 📄 Document AI Assistant")
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  chat_state = gr.State([])
134
  thread_state = gr.State()
135
  image_state = gr.State()
136
  client_id = gr.State()
137
- voice_enabled = gr.State(False)
138
 
139
  with gr.Row(equal_height=True):
140
  with gr.Column(scale=1):
@@ -144,27 +159,52 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
144
  chat = gr.Chatbot(label="💬 Chat", height=460)
145
 
146
  with gr.Row():
147
- user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=6)
148
- mic_toggle_btn = gr.Button("🎙️", scale=1)
149
  send_btn = gr.Button("Send", variant="primary", scale=2)
150
 
151
- with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
152
- with gr.Row():
153
- voice_input = gr.Audio(label="Mic", streaming=True)
154
- voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
155
- clear_btn = gr.Button("🧹 Clear Transcript")
 
156
 
157
- # Functional bindings
158
- def toggle_voice(curr):
159
- return not curr, gr.update(visible=not curr)
160
 
161
- mic_toggle_btn.click(fn=toggle_voice, inputs=voice_enabled, outputs=[voice_enabled, voice_section])
162
  send_btn.click(fn=handle_chat,
163
  inputs=[user_prompt, chat_state, thread_state, image_state],
164
  outputs=[user_prompt, chat, thread_state, image_state])
 
165
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
166
- voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
167
- clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  app.load(fn=create_ws, outputs=[client_id])
169
 
170
- app.launch()
 
 
1
  import gradio as gr
2
  import os, time, re, json, base64, asyncio, threading, uuid, io
3
  import numpy as np
 
129
  with gr.Blocks(theme=gr.themes.Soft()) as app:
130
  gr.Markdown("# 📄 Document AI Assistant")
131
 
132
+ gr.HTML("""
133
+ <style>
134
+ .big-btn {
135
+ font-size: 18px !important;
136
+ padding: 14px 28px !important;
137
+ border-radius: 8px !important;
138
+ width: 100% !important;
139
+ margin-top: 10px;
140
+ }
141
+ .voice-area {
142
+ padding-top: 12px;
143
+ border-top: 1px solid #444;
144
+ margin-top: 12px;
145
+ }
146
+ </style>
147
+ """)
148
+
149
  chat_state = gr.State([])
150
  thread_state = gr.State()
151
  image_state = gr.State()
152
  client_id = gr.State()
 
153
 
154
  with gr.Row(equal_height=True):
155
  with gr.Column(scale=1):
 
159
  chat = gr.Chatbot(label="💬 Chat", height=460)
160
 
161
  with gr.Row():
162
+ user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=8)
 
163
  send_btn = gr.Button("Send", variant="primary", scale=2)
164
 
165
+ # === Voice Transcription Section ===
166
+ with gr.Column(elem_classes="voice-area"):
167
+ gr.Markdown("### 🎙️ Voice Input")
168
+
169
+ voice_input = gr.Audio(label="Tap to Record", streaming=True, type="numpy", show_label=True)
170
+ voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
171
 
172
+ with gr.Row():
173
+ voice_send_btn = gr.Button("🟢 Send Voice to Assistant", elem_classes="big-btn")
174
+ voice_clear_btn = gr.Button("🧹 Clear", elem_classes="big-btn")
175
 
176
+ # ============ Functional Bindings ============
177
  send_btn.click(fn=handle_chat,
178
  inputs=[user_prompt, chat_state, thread_state, image_state],
179
  outputs=[user_prompt, chat, thread_state, image_state])
180
+
181
  image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
182
+
183
+ voice_input.stream(fn=send_audio,
184
+ inputs=[voice_input, client_id],
185
+ outputs=voice_transcript,
186
+ stream_every=0.5)
187
+
188
+ def feed_voice_to_assistant(transcript, history, thread_id, image_url, cid):
189
+ if not transcript.strip():
190
+ return gr.update(), history, thread_id, image_url
191
+ if cid in connections:
192
+ connections[cid].transcript = ""
193
+ return handle_chat(transcript, history, thread_id, image_url)
194
+
195
+ def clear_all(cid):
196
+ if cid in connections:
197
+ connections[cid].transcript = ""
198
+ return [], "", None, None
199
+
200
+ voice_send_btn.click(fn=feed_voice_to_assistant,
201
+ inputs=[voice_transcript, chat_state, thread_state, image_state, client_id],
202
+ outputs=[user_prompt, chat, thread_state, image_state])
203
+
204
+ voice_clear_btn.click(fn=clear_all,
205
+ inputs=[client_id],
206
+ outputs=[chat, voice_transcript, thread_state, image_state])
207
+
208
  app.load(fn=create_ws, outputs=[client_id])
209
 
210
+ app.launch()