Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import re
|
|
4 |
import gradio as gr
|
5 |
import asyncio
|
6 |
import logging
|
7 |
-
import
|
8 |
from serpapi import GoogleSearch
|
9 |
from pydantic import BaseModel
|
10 |
from autogen_agentchat.agents import AssistantAgent
|
@@ -20,6 +20,7 @@ import traceback
|
|
20 |
import soundfile as sf
|
21 |
import tempfile
|
22 |
from pydub import AudioSegment
|
|
|
23 |
|
24 |
# Set up logging
|
25 |
logging.basicConfig(
|
@@ -35,7 +36,16 @@ logger = logging.getLogger(__name__)
|
|
35 |
# Set up environment
|
36 |
OUTPUT_DIR = "outputs"
|
37 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
# Define Pydantic model for slide data
|
41 |
class Slide(BaseModel):
|
@@ -172,34 +182,17 @@ async def validate_and_convert_speaker_audio(speaker_audio):
|
|
172 |
logger.error("Failed to validate or convert speaker audio %s: %s", speaker_audio, str(e))
|
173 |
return None
|
174 |
|
175 |
-
# Helper function to generate audio using
|
176 |
def generate_xtts_audio(text, speaker_wav, output_path):
|
|
|
|
|
|
|
177 |
try:
|
178 |
-
|
179 |
-
|
180 |
-
"--model_name", "tts_models/multilingual/multi-dataset/xtts_v2",
|
181 |
-
"--encoder_path", "model_se.pth.tar",
|
182 |
-
"--encoder_config", "config_se.json",
|
183 |
-
"--speaker_wav", speaker_wav,
|
184 |
-
"--text", text,
|
185 |
-
"--out_path", output_path,
|
186 |
-
"--language_idx", "en"
|
187 |
-
]
|
188 |
-
logger.debug("Executing tts command: %s", " ".join(cmd))
|
189 |
-
result = subprocess.run(
|
190 |
-
cmd,
|
191 |
-
capture_output=True,
|
192 |
-
text=True,
|
193 |
-
input="y\n", # Automatically provide 'y' to any download prompt
|
194 |
-
check=True
|
195 |
-
)
|
196 |
-
logger.info("tts command succeeded for %s: %s", output_path, result.stdout)
|
197 |
return True
|
198 |
-
except subprocess.CalledProcessError as e:
|
199 |
-
logger.error("tts command failed for %s: %s\n%s", output_path, e.stderr, e.stdout)
|
200 |
-
return False
|
201 |
except Exception as e:
|
202 |
-
logger.error("
|
203 |
return False
|
204 |
|
205 |
# Helper function to extract JSON from messages
|
@@ -288,9 +281,19 @@ def extract_json_from_message(message):
|
|
288 |
# Function to generate Markdown and convert to PDF (landscape, centered)
|
289 |
def generate_slides_pdf(slides):
|
290 |
pdf = MarkdownPdf()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
for slide in slides:
|
292 |
content_lines = slide['content'].replace('\n', '\n\n')
|
293 |
markdown_content = f"""
|
|
|
|
|
294 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
|
295 |
# {slide['title']}
|
296 |
|
@@ -300,6 +303,8 @@ def generate_slides_pdf(slides):
|
|
300 |
{content_lines}
|
301 |
</div>
|
302 |
|
|
|
|
|
303 |
---
|
304 |
"""
|
305 |
pdf.add_section(Section(markdown_content, toc=False))
|
@@ -328,6 +333,15 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
|
|
328 |
"""
|
329 |
return
|
330 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
model_client = get_model_client(api_service, api_key)
|
332 |
|
333 |
research_agent = AssistantAgent(
|
@@ -564,8 +578,7 @@ Example for 1 content slide:
|
|
564 |
|
565 |
# Generate PDF from slides
|
566 |
pdf_file = generate_slides_pdf(slides)
|
567 |
-
pdf_path = os.path.
|
568 |
-
print(f"PDF file generated: {pdf_file}")
|
569 |
|
570 |
audio_files = []
|
571 |
speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
|
@@ -580,22 +593,6 @@ Example for 1 content slide:
|
|
580 |
"""
|
581 |
return
|
582 |
|
583 |
-
# Verify XTTS-v2 model files
|
584 |
-
# required_files = [
|
585 |
-
# os.path.join(XTTS_MODEL_DIR, "model_se.pth.tar"),
|
586 |
-
# os.path.join(XTTS_MODEL_DIR, "config_se.json")
|
587 |
-
# ]
|
588 |
-
# for f in required_files:
|
589 |
-
# if not os.path.exists(f):
|
590 |
-
# logger.error("Missing XTTS-v2 model file: %s", f)
|
591 |
-
# yield f"""
|
592 |
-
# <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
593 |
-
# <h2 style="color: #d9534f;">Missing XTTS-v2 model files</h2>
|
594 |
-
# <p style="margin-top: 20px;">Please ensure XTTS-v2 is downloaded to {XTTS_MODEL_DIR} and try again.</p>
|
595 |
-
# </div>
|
596 |
-
# """
|
597 |
-
# return
|
598 |
-
|
599 |
# Process audio generation sequentially with retries
|
600 |
for i, script in enumerate(scripts):
|
601 |
cleaned_script = clean_script_text(script)
|
@@ -627,7 +624,7 @@ Example for 1 content slide:
|
|
627 |
|
628 |
success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
|
629 |
if not success:
|
630 |
-
raise RuntimeError("
|
631 |
|
632 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
633 |
audio_files.append(audio_file)
|
|
|
4 |
import gradio as gr
|
5 |
import asyncio
|
6 |
import logging
|
7 |
+
import torch
|
8 |
from serpapi import GoogleSearch
|
9 |
from pydantic import BaseModel
|
10 |
from autogen_agentchat.agents import AssistantAgent
|
|
|
20 |
import soundfile as sf
|
21 |
import tempfile
|
22 |
from pydub import AudioSegment
|
23 |
+
from TTS.api import TTS
|
24 |
|
25 |
# Set up logging
|
26 |
logging.basicConfig(
|
|
|
36 |
# Set up environment
|
37 |
OUTPUT_DIR = "outputs"
|
38 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
39 |
+
os.environ["COQUI_TOS_AGREED"] = "1"
|
40 |
+
|
41 |
+
# Initialize TTS model
|
42 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
43 |
+
try:
|
44 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
45 |
+
logger.info("TTS model initialized on %s", device)
|
46 |
+
except Exception as e:
|
47 |
+
logger.error("Failed to initialize TTS model: %s", str(e))
|
48 |
+
tts = None
|
49 |
|
50 |
# Define Pydantic model for slide data
|
51 |
class Slide(BaseModel):
|
|
|
182 |
logger.error("Failed to validate or convert speaker audio %s: %s", speaker_audio, str(e))
|
183 |
return None
|
184 |
|
185 |
+
# Helper function to generate audio using Coqui TTS API
|
186 |
def generate_xtts_audio(text, speaker_wav, output_path):
|
187 |
+
if not tts:
|
188 |
+
logger.error("TTS model not initialized")
|
189 |
+
return False
|
190 |
try:
|
191 |
+
tts.tts_to_file(text=text, speaker_wav=speaker_wav, language="en", file_path=output_path)
|
192 |
+
logger.info("Generated audio for %s", output_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
return True
|
|
|
|
|
|
|
194 |
except Exception as e:
|
195 |
+
logger.error("Failed to generate audio for %s: %s", output_path, str(e))
|
196 |
return False
|
197 |
|
198 |
# Helper function to extract JSON from messages
|
|
|
281 |
# Function to generate Markdown and convert to PDF (landscape, centered)
|
282 |
def generate_slides_pdf(slides):
|
283 |
pdf = MarkdownPdf()
|
284 |
+
# Add LaTeX preamble for landscape orientation
|
285 |
+
preamble = r"""
|
286 |
+
\usepackage{pdflscape}
|
287 |
+
\newcommand{\blandscape}{\begin{landscape}}
|
288 |
+
\newcommand{\elandscape}{\end{landscape}}
|
289 |
+
"""
|
290 |
+
pdf.set_preamble(preamble)
|
291 |
+
|
292 |
for slide in slides:
|
293 |
content_lines = slide['content'].replace('\n', '\n\n')
|
294 |
markdown_content = f"""
|
295 |
+
\\blandscape
|
296 |
+
|
297 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
|
298 |
# {slide['title']}
|
299 |
|
|
|
303 |
{content_lines}
|
304 |
</div>
|
305 |
|
306 |
+
\\elandscape
|
307 |
+
|
308 |
---
|
309 |
"""
|
310 |
pdf.add_section(Section(markdown_content, toc=False))
|
|
|
333 |
"""
|
334 |
return
|
335 |
|
336 |
+
if not tts:
|
337 |
+
yield f"""
|
338 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
339 |
+
<h2 style="color: #d9534f;">TTS model not initialized</h2>
|
340 |
+
<p style="margin-top: 20px;">Please ensure the Coqui TTS model is properly installed and try again.</p>
|
341 |
+
</div>
|
342 |
+
"""
|
343 |
+
return
|
344 |
+
|
345 |
model_client = get_model_client(api_service, api_key)
|
346 |
|
347 |
research_agent = AssistantAgent(
|
|
|
578 |
|
579 |
# Generate PDF from slides
|
580 |
pdf_file = generate_slides_pdf(slides)
|
581 |
+
pdf_path = f"file://{os.path.abspath(pdf_file)}"
|
|
|
582 |
|
583 |
audio_files = []
|
584 |
speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
|
|
|
593 |
"""
|
594 |
return
|
595 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
596 |
# Process audio generation sequentially with retries
|
597 |
for i, script in enumerate(scripts):
|
598 |
cleaned_script = clean_script_text(script)
|
|
|
624 |
|
625 |
success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
|
626 |
if not success:
|
627 |
+
raise RuntimeError("TTS generation failed")
|
628 |
|
629 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
630 |
audio_files.append(audio_file)
|