Jaward commited on
Commit
3d31350
·
verified ·
1 Parent(s): f0c7d2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -45
app.py CHANGED
@@ -4,7 +4,7 @@ import re
4
  import gradio as gr
5
  import asyncio
6
  import logging
7
- import subprocess
8
  from serpapi import GoogleSearch
9
  from pydantic import BaseModel
10
  from autogen_agentchat.agents import AssistantAgent
@@ -20,6 +20,7 @@ import traceback
20
  import soundfile as sf
21
  import tempfile
22
  from pydub import AudioSegment
 
23
 
24
  # Set up logging
25
  logging.basicConfig(
@@ -35,7 +36,16 @@ logger = logging.getLogger(__name__)
35
  # Set up environment
36
  OUTPUT_DIR = "outputs"
37
  os.makedirs(OUTPUT_DIR, exist_ok=True)
38
- XTTS_MODEL_DIR = "XTTS-v2"
 
 
 
 
 
 
 
 
 
39
 
40
  # Define Pydantic model for slide data
41
  class Slide(BaseModel):
@@ -172,34 +182,17 @@ async def validate_and_convert_speaker_audio(speaker_audio):
172
  logger.error("Failed to validate or convert speaker audio %s: %s", speaker_audio, str(e))
173
  return None
174
 
175
- # Helper function to generate audio using XTTS-v2 CLI
176
  def generate_xtts_audio(text, speaker_wav, output_path):
 
 
 
177
  try:
178
- cmd = [
179
- "tts",
180
- "--model_name", "tts_models/multilingual/multi-dataset/xtts_v2",
181
- "--encoder_path", "model_se.pth.tar",
182
- "--encoder_config", "config_se.json",
183
- "--speaker_wav", speaker_wav,
184
- "--text", text,
185
- "--out_path", output_path,
186
- "--language_idx", "en"
187
- ]
188
- logger.debug("Executing tts command: %s", " ".join(cmd))
189
- result = subprocess.run(
190
- cmd,
191
- capture_output=True,
192
- text=True,
193
- input="y\n", # Automatically provide 'y' to any download prompt
194
- check=True
195
- )
196
- logger.info("tts command succeeded for %s: %s", output_path, result.stdout)
197
  return True
198
- except subprocess.CalledProcessError as e:
199
- logger.error("tts command failed for %s: %s\n%s", output_path, e.stderr, e.stdout)
200
- return False
201
  except Exception as e:
202
- logger.error("Unexpected error running tts for %s: %s", output_path, str(e))
203
  return False
204
 
205
  # Helper function to extract JSON from messages
@@ -288,9 +281,19 @@ def extract_json_from_message(message):
288
  # Function to generate Markdown and convert to PDF (landscape, centered)
289
  def generate_slides_pdf(slides):
290
  pdf = MarkdownPdf()
 
 
 
 
 
 
 
 
291
  for slide in slides:
292
  content_lines = slide['content'].replace('\n', '\n\n')
293
  markdown_content = f"""
 
 
294
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
295
  # {slide['title']}
296
 
@@ -300,6 +303,8 @@ def generate_slides_pdf(slides):
300
  {content_lines}
301
  </div>
302
 
 
 
303
  ---
304
  """
305
  pdf.add_section(Section(markdown_content, toc=False))
@@ -328,6 +333,15 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
328
  """
329
  return
330
 
 
 
 
 
 
 
 
 
 
331
  model_client = get_model_client(api_service, api_key)
332
 
333
  research_agent = AssistantAgent(
@@ -564,8 +578,7 @@ Example for 1 content slide:
564
 
565
  # Generate PDF from slides
566
  pdf_file = generate_slides_pdf(slides)
567
- pdf_path = os.path.join(OUTPUT_DIR, pdf_file)
568
- print(f"PDF file generated: {pdf_file}")
569
 
570
  audio_files = []
571
  speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
@@ -580,22 +593,6 @@ Example for 1 content slide:
580
  """
581
  return
582
 
583
- # Verify XTTS-v2 model files
584
- # required_files = [
585
- # os.path.join(XTTS_MODEL_DIR, "model_se.pth.tar"),
586
- # os.path.join(XTTS_MODEL_DIR, "config_se.json")
587
- # ]
588
- # for f in required_files:
589
- # if not os.path.exists(f):
590
- # logger.error("Missing XTTS-v2 model file: %s", f)
591
- # yield f"""
592
- # <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
593
- # <h2 style="color: #d9534f;">Missing XTTS-v2 model files</h2>
594
- # <p style="margin-top: 20px;">Please ensure XTTS-v2 is downloaded to {XTTS_MODEL_DIR} and try again.</p>
595
- # </div>
596
- # """
597
- # return
598
-
599
  # Process audio generation sequentially with retries
600
  for i, script in enumerate(scripts):
601
  cleaned_script = clean_script_text(script)
@@ -627,7 +624,7 @@ Example for 1 content slide:
627
 
628
  success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
629
  if not success:
630
- raise RuntimeError("tts command failed")
631
 
632
  logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
633
  audio_files.append(audio_file)
 
4
  import gradio as gr
5
  import asyncio
6
  import logging
7
+ import torch
8
  from serpapi import GoogleSearch
9
  from pydantic import BaseModel
10
  from autogen_agentchat.agents import AssistantAgent
 
20
  import soundfile as sf
21
  import tempfile
22
  from pydub import AudioSegment
23
+ from TTS.api import TTS
24
 
25
  # Set up logging
26
  logging.basicConfig(
 
36
  # Set up environment
37
  OUTPUT_DIR = "outputs"
38
  os.makedirs(OUTPUT_DIR, exist_ok=True)
39
+ os.environ["COQUI_TOS_AGREED"] = "1"
40
+
41
+ # Initialize TTS model
42
+ device = "cuda" if torch.cuda.is_available() else "cpu"
43
+ try:
44
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
45
+ logger.info("TTS model initialized on %s", device)
46
+ except Exception as e:
47
+ logger.error("Failed to initialize TTS model: %s", str(e))
48
+ tts = None
49
 
50
  # Define Pydantic model for slide data
51
  class Slide(BaseModel):
 
182
  logger.error("Failed to validate or convert speaker audio %s: %s", speaker_audio, str(e))
183
  return None
184
 
185
+ # Helper function to generate audio using Coqui TTS API
186
  def generate_xtts_audio(text, speaker_wav, output_path):
187
+ if not tts:
188
+ logger.error("TTS model not initialized")
189
+ return False
190
  try:
191
+ tts.tts_to_file(text=text, speaker_wav=speaker_wav, language="en", file_path=output_path)
192
+ logger.info("Generated audio for %s", output_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  return True
 
 
 
194
  except Exception as e:
195
+ logger.error("Failed to generate audio for %s: %s", output_path, str(e))
196
  return False
197
 
198
  # Helper function to extract JSON from messages
 
281
  # Function to generate Markdown and convert to PDF (landscape, centered)
282
  def generate_slides_pdf(slides):
283
  pdf = MarkdownPdf()
284
+ # Add LaTeX preamble for landscape orientation
285
+ preamble = r"""
286
+ \usepackage{pdflscape}
287
+ \newcommand{\blandscape}{\begin{landscape}}
288
+ \newcommand{\elandscape}{\end{landscape}}
289
+ """
290
+ pdf.set_preamble(preamble)
291
+
292
  for slide in slides:
293
  content_lines = slide['content'].replace('\n', '\n\n')
294
  markdown_content = f"""
295
+ \\blandscape
296
+
297
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
298
  # {slide['title']}
299
 
 
303
  {content_lines}
304
  </div>
305
 
306
+ \\elandscape
307
+
308
  ---
309
  """
310
  pdf.add_section(Section(markdown_content, toc=False))
 
333
  """
334
  return
335
 
336
+ if not tts:
337
+ yield f"""
338
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
339
+ <h2 style="color: #d9534f;">TTS model not initialized</h2>
340
+ <p style="margin-top: 20px;">Please ensure the Coqui TTS model is properly installed and try again.</p>
341
+ </div>
342
+ """
343
+ return
344
+
345
  model_client = get_model_client(api_service, api_key)
346
 
347
  research_agent = AssistantAgent(
 
578
 
579
  # Generate PDF from slides
580
  pdf_file = generate_slides_pdf(slides)
581
+ pdf_path = f"file://{os.path.abspath(pdf_file)}"
 
582
 
583
  audio_files = []
584
  speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
 
593
  """
594
  return
595
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
  # Process audio generation sequentially with retries
597
  for i, script in enumerate(scripts):
598
  cleaned_script = clean_script_text(script)
 
624
 
625
  success = generate_xtts_audio(current_text, validated_speaker_wav, audio_file)
626
  if not success:
627
+ raise RuntimeError("TTS generation failed")
628
 
629
  logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
630
  audio_files.append(audio_file)