ROBO-R1984

Running on Zero

App Files Files Community

openfree commited on Jun 14

Commit

3f3dfbe

verified ·

1 Parent(s): 83f1de2

Update app.py

Browse files

Files changed (1) hide show

app.py +235 -67

app.py CHANGED Viewed

@@ -15,11 +15,14 @@ import torch
 import numpy as np
 from loguru import logger
 from PIL import Image
-from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer, WhisperProcessor, WhisperForConditionalGeneration
 import time
 import warnings
 from typing import Dict, List, Optional, Union
 import librosa
 # CSV/TXT 분석
 import pandas as pd
@@ -28,7 +31,7 @@ import PyPDF2
 warnings.filterwarnings('ignore')
-print("🎮 로봇 시각 시스템 초기화 (Gemma3-R1984-4B + Whisper)...")
 ##############################################################################
 # 상수 정의
@@ -44,7 +47,6 @@ SERPHOUSE_API_KEY = os.getenv("SERPHOUSE_API_KEY", "")
 model = None
 processor = None
 whisper_model = None
-whisper_processor = None
 model_loaded = False
 whisper_loaded = False
 model_name = "Gemma3-R1984-4B"
@@ -63,7 +65,7 @@ def clear_cuda_cache():
 ##############################################################################
 @spaces.GPU(duration=60)
 def load_whisper():
-    global whisper_model, whisper_processor, whisper_loaded
     if whisper_loaded:
         logger.info("Whisper 모델이 이미 로드되어 있습니다.")
@@ -71,11 +73,13 @@ def load_whisper():
     try:
         logger.info("Whisper 모델 로딩 시작...")
-        whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base")
-        whisper_model = WhisperForConditionalGeneration.from_pretrained(
-            "openai/whisper-base",
-            device_map="auto",
-            torch_dtype=torch.float16
         )
         whisper_loaded = True
         logger.info("✅ Whisper 모델 로딩 완료!")
@@ -87,42 +91,141 @@ def load_whisper():
 ##############################################################################
 # 오디오 처리 함수
 ##############################################################################
 @spaces.GPU(duration=30)
-def transcribe_audio(audio_data):
     """Whisper를 사용한 오디오 전사"""
-    global whisper_model, whisper_processor
     if not whisper_loaded:
         if not load_whisper():
-            return "오디오 처리 불가"
     try:
-        if audio_data is None:
-            return None
-        # 오디오 데이터 처리
-        sample_rate, audio = audio_data
-        # 16kHz로 리샘플링
-        if sample_rate != 16000:
-            audio = librosa.resample(audio.astype(float), orig_sr=sample_rate, target_sr=16000)
-        # Whisper 입력 처리
-        inputs = whisper_processor(audio, sampling_rate=16000, return_tensors="pt")
-        inputs = {k: v.to(whisper_model.device) for k, v in inputs.items()}
         # 음성 인식
-        with torch.no_grad():
-            generated_ids = whisper_model.generate(**inputs, max_length=225)
-        # 디코딩
-        transcription = whisper_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return transcription.strip()
     except Exception as e:
-        logger.error(f"오디오 전사 오류: {e}")
-        return f"오디오 인식 실패: {str(e)}"
 ##############################################################################
 # 키워드 추출 함수
@@ -591,13 +694,19 @@ css = """
     background: #e3f2fd;
     color: #1565c0;
 }
 """
 with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as demo:
     gr.HTML("""
     <div class="robot-header">
         <h1>🤖 로봇 시각 시스템</h1>
-        <h3>🎮 Gemma3-R1984-4B + 📷 실시간 웹캠 + 🎤 음성 인식</h3>
         <p>⚡ 멀티모달 AI로 로봇 작업 분석!</p>
     </div>
     """)
@@ -636,6 +745,11 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
                     '<div class="audio-status">🎤 음성 인식: 비활성화</div>'
                 )
                 # 마지막 인식된 텍스트
                 last_transcript = gr.Textbox(
                     label="인식된 음성",
@@ -657,9 +771,9 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
                 )
                 use_audio_toggle = gr.Checkbox(
-                    label="🎤 음성 인식 사용",
                     value=False,
-                    info="주변 소리를 인식하여 분석에 포함"
                 )
             with gr.Row():
@@ -719,6 +833,19 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
         label="마이크 입력"
     )
     # 문서 분석 탭 (숨김)
     with gr.Tab("📄 문서 분석", visible=False):
         with gr.Row():
@@ -752,8 +879,6 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
     # 이벤트 핸들러
     webcam_state = gr.State(None)
-    audio_state = gr.State(None)
-    transcript_state = gr.State("")
     def capture_webcam(frame):
         """웹캠 프레임 캡처"""
@@ -763,15 +888,29 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
     def clear_capture():
         """캡처 초기화"""
         return None, gr.update(visible=False), '<div class="status-box" style="background:#d4edda; color:#155724;">🎮 시스템 준비</div>', ""
-    def analyze_with_task(image, prompt, task_type, use_search, thinking, tokens, transcript):
         """특정 태스크로 이미지 분석"""
         if image is None:
             return "❌ 먼저 이미지를 캡처하세요.", '<div class="status-box" style="background:#f8d7da; color:#721c24;">❌ 이미지 없음</div>'
         status = f'<div class="status-box" style="background:#cce5ff; color:#004085;">🚀 {task_type} 분석 중...</div>'
         result = analyze_image_for_robot(
             image=image,
             prompt=prompt,
@@ -800,27 +939,31 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
         return formatted_result, complete_status
     # 자동 캡처 및 분석 함수
-    def auto_capture_and_analyze(webcam_frame, audio_data, task_prompt, use_search, thinking, tokens, use_audio, current_transcript):
-        """자동 캡처 및 분석 (오디오 포함)"""
         if webcam_frame is None:
             return (
                 None,
                 "자동 캡처 대기 중...",
                 '<div class="status-box" style="background:#fff3cd; color:#856404;">⏳ 웹캠 대기 중</div>',
                 '<div class="auto-capture-status">🔄 자동 캡처: 웹캠 대기 중</div>',
-                current_transcript,
-                current_transcript
             )
         # 캡처 수행
         timestamp = time.strftime("%H:%M:%S")
-        # 오디오 처리 (활성화된 경우)
-        new_transcript = ""
-        if use_audio and audio_data is not None:
-            transcribed = transcribe_audio(audio_data)
-            if transcribed and transcribed != "오디오 처리 불가":
-                new_transcript = transcribed
         # 이미지 분석 (작업 계획 모드로)
         result = analyze_image_for_robot(
@@ -830,7 +973,7 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
             use_web_search=use_search,
             enable_thinking=thinking,
             max_new_tokens=tokens,
-            audio_transcript=new_transcript if new_transcript else None
         )
         formatted_result = f"""🔄 자동 분석 완료 ({timestamp})
@@ -838,13 +981,15 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
 {result}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"""
         return (
             webcam_frame,
             formatted_result,
             '<div class="status-box" style="background:#d4edda; color:#155724;">✅ 자동 분석 완료</div>',
             f'<div class="auto-capture-status">🔄 자동 캡처: 마지막 분석 {timestamp}</div>',
-            new_transcript if new_transcript else current_transcript,
-            new_transcript if new_transcript else current_transcript
         )
     # 웹캠 스트리밍
@@ -854,14 +999,17 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
         outputs=[webcam_state]
     )
-    # 오디오 스트리밍
-    def process_audio_stream(audio_data):
-        return audio_data
     audio_input.stream(
-        fn=process_audio_stream,
         inputs=[audio_input],
-        outputs=[audio_state]
     )
     # 수동 캡처 버튼
@@ -874,19 +1022,19 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
     # 초기화 버튼
     clear_capture_btn.click(
         fn=clear_capture,
-        outputs=[webcam_state, captured_image, status_display, transcript_state]
     )
     # 작업 버튼들
     planning_btn.click(
-        fn=lambda img, p, s, t, tk, tr: analyze_with_task(img, p, "planning", s, t, tk, tr),
-        inputs=[captured_image, task_prompt, use_web_search, enable_thinking, max_tokens, transcript_state],
         outputs=[result_output, status_display]
     )
     grounding_btn.click(
-        fn=lambda img, p, s, t, tk, tr: analyze_with_task(img, p, "grounding", s, t, tk, tr),
-        inputs=[captured_image, task_prompt, use_web_search, enable_thinking, max_tokens, transcript_state],
         outputs=[result_output, status_display]
     )
@@ -924,14 +1072,29 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
     # 오디오 토글 이벤트
     def toggle_audio(enabled):
         if enabled:
             # Whisper 모델 로드
             load_whisper()
             return (
                 gr.update(visible=True),  # audio_input 표시
-                '<div class="audio-status">🎤 음성 인식: 활성화됨</div>'
             )
         else:
             return (
                 gr.update(visible=False),  # audio_input 숨김
                 '<div class="audio-status">🎤 음성 인식: 비활성화</div>'
@@ -946,13 +1109,18 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
     # 타이머 틱 이벤트
     timer.tick(
         fn=auto_capture_and_analyze,
-        inputs=[webcam_state, audio_state, task_prompt, use_web_search, enable_thinking, max_tokens, use_audio_toggle, transcript_state],
-        outputs=[captured_image, result_output, status_display, auto_capture_status, transcript_state, last_transcript]
     )
     # 초기 모델 로드
     def initial_load():
         load_model()
         return "시스템 준비 완료! 🚀"
     demo.load(
@@ -961,8 +1129,8 @@ with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as dem
     )
 if __name__ == "__main__":
-    print("🚀 로봇 시각 시스템 시작 (Gemma3-R1984-4B + Whisper)...")
-    demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=False,

 import numpy as np
 from loguru import logger
 from PIL import Image
+from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer, pipeline
 import time
 import warnings
 from typing import Dict, List, Optional, Union
 import librosa
+import scipy.signal as sps
+from threading import Thread, Lock
+import queue
 # CSV/TXT 분석
 import pandas as pd
 warnings.filterwarnings('ignore')
+print("🎮 로봇 시각 시스템 초기화 (Gemma3-R1984-4B + Whisper + 10초 교대 녹음)...")
 ##############################################################################
 # 상수 정의
 model = None
 processor = None
 whisper_model = None
 model_loaded = False
 whisper_loaded = False
 model_name = "Gemma3-R1984-4B"
 ##############################################################################
 @spaces.GPU(duration=60)
 def load_whisper():
+    global whisper_model, whisper_loaded
     if whisper_loaded:
         logger.info("Whisper 모델이 이미 로드되어 있습니다.")
     try:
         logger.info("Whisper 모델 로딩 시작...")
+        # 파이프라인 방식으로 로드
+        device = 0 if torch.cuda.is_available() else "cpu"
+        whisper_model = pipeline(
+            task="automatic-speech-recognition",
+            model="openai/whisper-base",
+            chunk_length_s=30,
+            device=device,
         )
         whisper_loaded = True
         logger.info("✅ Whisper 모델 로딩 완료!")
 ##############################################################################
 # 오디오 처리 함수
 ##############################################################################
+import scipy.signal as sps
+from threading import Thread, Lock
+import queue
+# 오디오 버퍼 관리
+audio_buffer_lock = Lock()
+audio_buffer_a = []
+audio_buffer_b = []
+current_buffer = 'a'  # 현재 녹음 중인 버퍼
+processing_queue = queue.Queue()  # 처리 대기 큐
+last_transcription = ""  # 마지막 전사 결과
+def resample_audio(audio: np.ndarray, orig_sr: int, target_sr: int = 16000) -> np.ndarray:
+    """오디오 리샘플링"""
+    if orig_sr == target_sr:
+        return audio.astype(np.float32)
+    # scipy를 사용한 리샘플링
+    number_of_samples = round(len(audio) * float(target_sr) / orig_sr)
+    audio_resampled = sps.resample(audio, number_of_samples)
+    return audio_resampled.astype(np.float32)
 @spaces.GPU(duration=30)
+def transcribe_audio_whisper(audio_array: np.ndarray, sr: int = 16000):
     """Whisper를 사용한 오디오 전사"""
+    global whisper_model, whisper_loaded
     if not whisper_loaded:
         if not load_whisper():
+            return None
     try:
         # 음성 인식
+        result = whisper_model({"array": audio_array, "sampling_rate": sr})
+        transcription = result["text"].strip()
+        return transcription if transcription else None
+    except Exception as e:
+        logger.error(f"Whisper 오디오 전사 오류: {e}")
+        return None
+def accumulate_audio(audio_chunk):
+    """오디오 청크를 버퍼에 누적"""
+    global current_buffer, audio_buffer_a, audio_buffer_b
+    if audio_chunk is None:
+        return
+    sr, audio = audio_chunk
+    # 스테레오를 모노로 변환
+    if audio.ndim > 1:
+        audio = audio.mean(axis=1)
+    with audio_buffer_lock:
+        if current_buffer == 'a':
+            audio_buffer_a.append((audio, sr))
+        else:
+            audio_buffer_b.append((audio, sr))
+def switch_buffers():
+    """버퍼 전환 및 처리 큐에 추가"""
+    global current_buffer, audio_buffer_a, audio_buffer_b
+    with audio_buffer_lock:
+        if current_buffer == 'a':
+            # A 버퍼를 처리 큐에 추가
+            if audio_buffer_a:
+                processing_queue.put(('a', audio_buffer_a.copy()))
+                audio_buffer_a.clear()
+            current_buffer = 'b'
+        else:
+            # B 버퍼를 처리 큐에 추가
+            if audio_buffer_b:
+                processing_queue.put(('b', audio_buffer_b.copy()))
+                audio_buffer_b.clear()
+            current_buffer = 'a'
+def process_audio_buffer(buffer_data):
+    """버퍼의 오디오 데이터 처리"""
+    buffer_name, audio_chunks = buffer_data
+    if not audio_chunks:
+        return None
+    try:
+        # 모든 청크를 하나로 결합
+        combined_audio = []
+        sample_rate = 16000
+        for audio, sr in audio_chunks:
+            # 16kHz로 리샘플링
+            if sr != 16000:
+                audio = resample_audio(audio, sr, 16000)
+            combined_audio.append(audio)
+        # 결합
+        if combined_audio:
+            full_audio = np.concatenate(combined_audio)
+            # Whisper로 전사
+            transcription = transcribe_audio_whisper(full_audio, 16000)
+            if transcription:
+                logger.info(f"버퍼 {buffer_name} 전사 완료: {transcription[:50]}...")
+                return transcription
     except Exception as e:
+        logger.error(f"오디오 버퍼 처리 오류: {e}")
+    return None
+# 백그라운드 처리 스레드
+def audio_processing_worker():
+    """백그라운드에서 오디오 버퍼 처리"""
+    global last_transcription
+    while True:
+        try:
+            # 처리할 버퍼 가져오기
+            buffer_data = processing_queue.get(timeout=1)
+            # 오디오 처리
+            result = process_audio_buffer(buffer_data)
+            if result:
+                # 결과를 전역 변수에 저장 (나중에 사용)
+                with audio_buffer_lock:
+                    last_transcription = result
+        except queue.Empty:
+            continue
+        except Exception as e:
+            logger.error(f"오디오 처리 워커 오류: {e}")
 ##############################################################################
 # 키워드 추출 함수
     background: #e3f2fd;
     color: #1565c0;
 }
+.buffer-info {
+    font-size: 0.9em;
+    color: #666;
+    text-align: center;
+    margin-top: 5px;
+}
 """
 with gr.Blocks(title="🤖 로봇 시각 시스템 (Gemma3-4B)", css=css) as demo:
     gr.HTML("""
     <div class="robot-header">
         <h1>🤖 로봇 시각 시스템</h1>
+        <h3>🎮 Gemma3-R1984-4B + 📷 실시간 웹캠 + 🎤 10초 교대 음성 인식</h3>
         <p>⚡ 멀티모달 AI로 로봇 작업 분석!</p>
     </div>
     """)
                     '<div class="audio-status">🎤 음성 인식: 비활성화</div>'
                 )
+                # 버퍼 정보
+                gr.HTML(
+                    '<div class="buffer-info">A/B 버퍼 교대 녹음으로 끊김 없는 인식</div>'
+                )
                 # 마지막 인식된 텍스트
                 last_transcript = gr.Textbox(
                     label="인식된 음성",
                 )
                 use_audio_toggle = gr.Checkbox(
+                    label="🎤 음성 인식 사용 (10초 교대 녹음)",
                     value=False,
+                    info="10초마다 교대로 녹음하여 끊김 없이 인식"
                 )
             with gr.Row():
         label="마이크 입력"
     )
+    # 오디오 스트리밍 처리
+    def audio_stream_callback(audio_chunk):
+        """오디오 스트림 콜백 - 버퍼에 누적"""
+        accumulate_audio(audio_chunk)
+        return None  # 상태 업데���트 없음
+    # 오디오 스트리밍 연결
+    audio_input.stream(
+        fn=audio_stream_callback,
+        inputs=[audio_input],
+        outputs=None
+    )
     # 문서 분석 탭 (숨김)
     with gr.Tab("📄 문서 분석", visible=False):
         with gr.Row():
     # 이벤트 핸들러
     webcam_state = gr.State(None)
     def capture_webcam(frame):
         """웹캠 프레임 캡처"""
     def clear_capture():
         """캡처 초기화"""
+        global last_transcription, audio_buffer_a, audio_buffer_b
+        with audio_buffer_lock:
+            last_transcription = ""
+            audio_buffer_a.clear()
+            audio_buffer_b.clear()
         return None, gr.update(visible=False), '<div class="status-box" style="background:#d4edda; color:#155724;">🎮 시스템 준비</div>', ""
+    def analyze_with_task(image, prompt, task_type, use_search, thinking, tokens):
         """특정 태스크로 이미지 분석"""
+        global last_transcription
         if image is None:
             return "❌ 먼저 이미지를 캡처하세요.", '<div class="status-box" style="background:#f8d7da; color:#721c24;">❌ 이미지 없음</div>'
         status = f'<div class="status-box" style="background:#cce5ff; color:#004085;">🚀 {task_type} 분석 중...</div>'
+        # 현재 전사 텍스트 가져오기
+        transcript = ""
+        with audio_buffer_lock:
+            transcript = last_transcription
         result = analyze_image_for_robot(
             image=image,
             prompt=prompt,
         return formatted_result, complete_status
     # 자동 캡처 및 분석 함수
+    def auto_capture_and_analyze(webcam_frame, task_prompt, use_search, thinking, tokens, use_audio):
+        """자동 캡처 및 분석 (10초마다 오디오 버퍼 전환)"""
+        global last_transcription
         if webcam_frame is None:
             return (
                 None,
                 "자동 캡처 대기 중...",
                 '<div class="status-box" style="background:#fff3cd; color:#856404;">⏳ 웹캠 대기 중</div>',
                 '<div class="auto-capture-status">🔄 자동 캡처: 웹캠 대기 중</div>',
+                ""
             )
         # 캡처 수행
         timestamp = time.strftime("%H:%M:%S")
+        # 버퍼 전환 (10초마다)
+        if use_audio:
+            switch_buffers()
+        # 마지막 전사 결과 가져오기
+        audio_transcript = ""
+        if use_audio:
+            with audio_buffer_lock:
+                audio_transcript = last_transcription
         # 이미지 분석 (작업 계획 모드로)
         result = analyze_image_for_robot(
             use_web_search=use_search,
             enable_thinking=thinking,
             max_new_tokens=tokens,
+            audio_transcript=audio_transcript if audio_transcript else None
         )
         formatted_result = f"""🔄 자동 분석 완료 ({timestamp})
 {result}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"""
+        # 마지막 인식된 텍스트 업데이트
+        transcript_display = audio_transcript if audio_transcript else "음성 인식 대기 중..."
         return (
             webcam_frame,
             formatted_result,
             '<div class="status-box" style="background:#d4edda; color:#155724;">✅ 자동 분석 완료</div>',
             f'<div class="auto-capture-status">🔄 자동 캡처: 마지막 분석 {timestamp}</div>',
+            transcript_display
         )
     # 웹캠 스트리밍
         outputs=[webcam_state]
     )
+    # 오디오 스트리밍 처리
+    def audio_stream_callback(audio_chunk):
+        """오디오 스트림 콜백 - 버퍼에 누적"""
+        accumulate_audio(audio_chunk)
+        return None  # 상태 업데이트 없음
+    # 오디오 스트리밍 연결
     audio_input.stream(
+        fn=audio_stream_callback,
         inputs=[audio_input],
+        outputs=None
     )
     # 수동 캡처 버튼
     # 초기화 버튼
     clear_capture_btn.click(
         fn=clear_capture,
+        outputs=[webcam_state, captured_image, status_display, last_transcript]
     )
     # 작업 버튼들
     planning_btn.click(
+        fn=lambda img, p, s, t, tk: analyze_with_task(img, p, "planning", s, t, tk),
+        inputs=[captured_image, task_prompt, use_web_search, enable_thinking, max_tokens],
         outputs=[result_output, status_display]
     )
     grounding_btn.click(
+        fn=lambda img, p, s, t, tk: analyze_with_task(img, p, "grounding", s, t, tk),
+        inputs=[captured_image, task_prompt, use_web_search, enable_thinking, max_tokens],
         outputs=[result_output, status_display]
     )
     # 오디오 토글 이벤트
     def toggle_audio(enabled):
+        global audio_buffer_a, audio_buffer_b, current_buffer, last_transcription
         if enabled:
             # Whisper 모델 로드
             load_whisper()
+            # 버퍼 초기화
+            with audio_buffer_lock:
+                audio_buffer_a.clear()
+                audio_buffer_b.clear()
+                current_buffer = 'a'
+                last_transcription = ""
             return (
                 gr.update(visible=True),  # audio_input 표시
+                '<div class="audio-status">🎤 음성 인식: 활성화됨 (10초 교대 녹음)</div>'
             )
         else:
+            # 버퍼 초기화
+            with audio_buffer_lock:
+                audio_buffer_a.clear()
+                audio_buffer_b.clear()
+                last_transcription = ""
             return (
                 gr.update(visible=False),  # audio_input 숨김
                 '<div class="audio-status">🎤 음성 인식: 비활성화</div>'
     # 타이머 틱 이벤트
     timer.tick(
         fn=auto_capture_and_analyze,
+        inputs=[webcam_state, task_prompt, use_web_search, enable_thinking, max_tokens, use_audio_toggle],
+        outputs=[captured_image, result_output, status_display, auto_capture_status, last_transcript]
     )
     # 초기 모델 로드
     def initial_load():
         load_model()
+        # 오디오 워커 스레드 시작
+        audio_worker_thread = Thread(target=audio_processing_worker, daemon=True)
+        audio_worker_thread.start()
         return "시스템 준비 완료! 🚀"
     demo.load(
     )
 if __name__ == "__main__":
+    print("🚀 로봇 시각 시스템 시작 (Gemma3-R1984-4B + Whisper 10초 교대 녹음)...")
+    demo.queue().launch(
         server_name="0.0.0.0",
         server_port=7860,
         share=False,