documentaitestv3

Running

App Files Files Community

IAMTFRMZA commited on 23 days ago

Commit

e0cd5da

verified ·

1 Parent(s): d13b654

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -16

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# top of the file
 import gradio as gr
 import os, time, re, json, base64, asyncio, threading, uuid, io
 import numpy as np
@@ -130,11 +129,27 @@ def handle_chat(user_input, history, thread_id, image_url):
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# 📄 Document AI Assistant")
     chat_state = gr.State([])
     thread_state = gr.State()
     image_state = gr.State()
     client_id = gr.State()
-    voice_enabled = gr.State(False)
     with gr.Row(equal_height=True):
         with gr.Column(scale=1):
@@ -144,27 +159,52 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
             chat = gr.Chatbot(label="💬 Chat", height=460)
             with gr.Row():
-                user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=6)
-                mic_toggle_btn = gr.Button("🎙️", scale=1)
                 send_btn = gr.Button("Send", variant="primary", scale=2)
-            with gr.Accordion("🎤 Voice Transcription", open=False) as voice_section:
-                with gr.Row():
-                    voice_input = gr.Audio(label="Mic", streaming=True)
-                    voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
-                clear_btn = gr.Button("🧹 Clear Transcript")
-    # Functional bindings
-    def toggle_voice(curr):
-        return not curr, gr.update(visible=not curr)
-    mic_toggle_btn.click(fn=toggle_voice, inputs=voice_enabled, outputs=[voice_enabled, voice_section])
     send_btn.click(fn=handle_chat,
                    inputs=[user_prompt, chat_state, thread_state, image_state],
                    outputs=[user_prompt, chat, thread_state, image_state])
     image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
-    voice_input.stream(fn=send_audio, inputs=[voice_input, client_id], outputs=voice_transcript, stream_every=0.5)
-    clear_btn.click(fn=clear_transcript, inputs=[client_id], outputs=voice_transcript)
     app.load(fn=create_ws, outputs=[client_id])
-app.launch()

 import gradio as gr
 import os, time, re, json, base64, asyncio, threading, uuid, io
 import numpy as np
 with gr.Blocks(theme=gr.themes.Soft()) as app:
     gr.Markdown("# 📄 Document AI Assistant")
+    gr.HTML("""
+    <style>
+    .big-btn {
+        font-size: 18px !important;
+        padding: 14px 28px !important;
+        border-radius: 8px !important;
+        width: 100% !important;
+        margin-top: 10px;
+    }
+    .voice-area {
+        padding-top: 12px;
+        border-top: 1px solid #444;
+        margin-top: 12px;
+    }
+    </style>
+    """)
     chat_state = gr.State([])
     thread_state = gr.State()
     image_state = gr.State()
     client_id = gr.State()
     with gr.Row(equal_height=True):
         with gr.Column(scale=1):
             chat = gr.Chatbot(label="💬 Chat", height=460)
             with gr.Row():
+                user_prompt = gr.Textbox(placeholder="Ask your question...", show_label=False, scale=8)
                 send_btn = gr.Button("Send", variant="primary", scale=2)
+            # === Voice Transcription Section ===
+            with gr.Column(elem_classes="voice-area"):
+                gr.Markdown("### 🎙️ Voice Input")
+                voice_input = gr.Audio(label="Tap to Record", streaming=True, type="numpy", show_label=True)
+                voice_transcript = gr.Textbox(label="Transcript", lines=2, interactive=False)
+                with gr.Row():
+                    voice_send_btn = gr.Button("🟢 Send Voice to Assistant", elem_classes="big-btn")
+                    voice_clear_btn = gr.Button("🧹 Clear", elem_classes="big-btn")
+    # ============ Functional Bindings ============
     send_btn.click(fn=handle_chat,
                    inputs=[user_prompt, chat_state, thread_state, image_state],
                    outputs=[user_prompt, chat, thread_state, image_state])
     image_state.change(fn=lambda x: x, inputs=image_state, outputs=image_display)
+    voice_input.stream(fn=send_audio,
+                       inputs=[voice_input, client_id],
+                       outputs=voice_transcript,
+                       stream_every=0.5)
+    def feed_voice_to_assistant(transcript, history, thread_id, image_url, cid):
+        if not transcript.strip():
+            return gr.update(), history, thread_id, image_url
+        if cid in connections:
+            connections[cid].transcript = ""
+        return handle_chat(transcript, history, thread_id, image_url)
+    def clear_all(cid):
+        if cid in connections:
+            connections[cid].transcript = ""
+        return [], "", None, None
+    voice_send_btn.click(fn=feed_voice_to_assistant,
+                         inputs=[voice_transcript, chat_state, thread_state, image_state, client_id],
+                         outputs=[user_prompt, chat, thread_state, image_state])
+    voice_clear_btn.click(fn=clear_all,
+                          inputs=[client_id],
+                          outputs=[chat, voice_transcript, thread_state, image_state])
     app.load(fn=create_ws, outputs=[client_id])
+app.launch()