Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import subprocess
|
|
|
8 |
from serpapi import GoogleSearch
|
9 |
from pydantic import BaseModel
|
10 |
from autogen_agentchat.agents import AssistantAgent
|
@@ -20,7 +21,7 @@ import traceback
|
|
20 |
import soundfile as sf
|
21 |
import tempfile
|
22 |
from pydub import AudioSegment
|
23 |
-
|
24 |
# Set up logging
|
25 |
logging.basicConfig(
|
26 |
level=logging.DEBUG,
|
@@ -172,6 +173,41 @@ async def validate_and_convert_speaker_audio(speaker_audio):
|
|
172 |
logger.error("Failed to validate or convert speaker audio %s: %s", speaker_audio, str(e))
|
173 |
return None
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
# Helper function to generate audio using XTTS-v2 CLI
|
176 |
def generate_xtts_audio(text, speaker_wav, output_path):
|
177 |
try:
|
@@ -578,6 +614,16 @@ Example for 1 content slide:
|
|
578 |
"""
|
579 |
return
|
580 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
# Verify XTTS-v2 model files
|
582 |
required_files = [
|
583 |
os.path.join(XTTS_MODEL_DIR, "model_se.pth.tar"),
|
@@ -589,7 +635,7 @@ Example for 1 content slide:
|
|
589 |
yield f"""
|
590 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
591 |
<h2 style="color: #d9534f;">Missing XTTS-v2 model files</h2>
|
592 |
-
<p style="margin-top: 20px;">
|
593 |
</div>
|
594 |
"""
|
595 |
return
|
@@ -776,6 +822,7 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
776 |
serpapi_key = gr.Textbox(label="SerpApi Key", type="password", placeholder="Enter your SerpApi key")
|
777 |
num_slides = gr.Slider(1, 20, step=1, label="Number of Content Slides", value=3)
|
778 |
speaker_audio = gr.Audio(label="Speaker sample audio (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
|
|
|
779 |
generate_btn = gr.Button("Generate Lecture")
|
780 |
with gr.Column(scale=2):
|
781 |
default_slide_html = """
|
@@ -788,7 +835,8 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
788 |
|
789 |
speaker_audio.change(
|
790 |
fn=update_audio_preview,
|
791 |
-
inputs=speaker_audio
|
|
|
792 |
)
|
793 |
|
794 |
generate_btn.click(
|
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import subprocess
|
8 |
+
import shutil
|
9 |
from serpapi import GoogleSearch
|
10 |
from pydantic import BaseModel
|
11 |
from autogen_agentchat.agents import AssistantAgent
|
|
|
21 |
import soundfile as sf
|
22 |
import tempfile
|
23 |
from pydub import AudioSegment
|
24 |
+
|
25 |
# Set up logging
|
26 |
logging.basicConfig(
|
27 |
level=logging.DEBUG,
|
|
|
173 |
logger.error("Failed to validate or convert speaker audio %s: %s", speaker_audio, str(e))
|
174 |
return None
|
175 |
|
176 |
+
# Helper function to download XTTS-v2 model
|
177 |
+
def download_xtts_model():
|
178 |
+
if os.path.exists(XTTS_MODEL_DIR):
|
179 |
+
logger.info("XTTS-v2 directory already exists: %s", XTTS_MODEL_DIR)
|
180 |
+
return True
|
181 |
+
|
182 |
+
if not shutil.which("huggingface-cli"):
|
183 |
+
logger.error("huggingface-cli not installed")
|
184 |
+
return False
|
185 |
+
|
186 |
+
cmd = [
|
187 |
+
"huggingface-cli",
|
188 |
+
"download",
|
189 |
+
"coqui/XTTS-v2",
|
190 |
+
"--repo-type", "model",
|
191 |
+
"--local-dir", XTTS_MODEL_DIR
|
192 |
+
]
|
193 |
+
logger.info("Downloading XTTS-v2 model: %s", " ".join(cmd))
|
194 |
+
|
195 |
+
try:
|
196 |
+
result = subprocess.run(
|
197 |
+
cmd,
|
198 |
+
capture_output=True,
|
199 |
+
text=True,
|
200 |
+
check=True
|
201 |
+
)
|
202 |
+
logger.info("XTTS-v2 download succeeded: %s", result.stdout)
|
203 |
+
return True
|
204 |
+
except subprocess.CalledProcessError as e:
|
205 |
+
logger.error("Failed to download XTTS-v2: %s\n%s", e.stderr, e.stdout)
|
206 |
+
return False
|
207 |
+
except Exception as e:
|
208 |
+
logger.error("Unexpected error downloading XTTS-v2: %s", str(e))
|
209 |
+
return False
|
210 |
+
|
211 |
# Helper function to generate audio using XTTS-v2 CLI
|
212 |
def generate_xtts_audio(text, speaker_wav, output_path):
|
213 |
try:
|
|
|
614 |
"""
|
615 |
return
|
616 |
|
617 |
+
# Download XTTS-v2 model if needed
|
618 |
+
if not download_xtts_model():
|
619 |
+
yield f"""
|
620 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
621 |
+
<h2 style="color: #d9534f;">Failed to download XTTS-v2 model</h2>
|
622 |
+
<p style="margin-top: 20px;">Please install huggingface_hub (`pip install huggingface_hub`) and ensure internet connectivity, then try again.</p>
|
623 |
+
</div>
|
624 |
+
"""
|
625 |
+
return
|
626 |
+
|
627 |
# Verify XTTS-v2 model files
|
628 |
required_files = [
|
629 |
os.path.join(XTTS_MODEL_DIR, "model_se.pth.tar"),
|
|
|
635 |
yield f"""
|
636 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
637 |
<h2 style="color: #d9534f;">Missing XTTS-v2 model files</h2>
|
638 |
+
<p style="margin-top: 20px;">Failed to download XTTS-v2 to {XTTS_MODEL_DIR}. Please run `huggingface-cli download coqui/XTTS-v2 --repo-type model --local-dir XTTS-v2` manually and try again.</p>
|
639 |
</div>
|
640 |
"""
|
641 |
return
|
|
|
822 |
serpapi_key = gr.Textbox(label="SerpApi Key", type="password", placeholder="Enter your SerpApi key")
|
823 |
num_slides = gr.Slider(1, 20, step=1, label="Number of Content Slides", value=3)
|
824 |
speaker_audio = gr.Audio(label="Speaker sample audio (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
|
825 |
+
audio_preview = gr.Audio(label="Audio Preview", interactive=False)
|
826 |
generate_btn = gr.Button("Generate Lecture")
|
827 |
with gr.Column(scale=2):
|
828 |
default_slide_html = """
|
|
|
835 |
|
836 |
speaker_audio.change(
|
837 |
fn=update_audio_preview,
|
838 |
+
inputs=speaker_audio,
|
839 |
+
outputs=audio_preview
|
840 |
)
|
841 |
|
842 |
generate_btn.click(
|