Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import json
|
3 |
import re
|
@@ -6,8 +10,6 @@ import asyncio
|
|
6 |
import logging
|
7 |
import torch
|
8 |
import random
|
9 |
-
import tempfile
|
10 |
-
import zipfile
|
11 |
from serpapi import GoogleSearch
|
12 |
from pydantic import BaseModel
|
13 |
from autogen_agentchat.agents import AssistantAgent
|
@@ -21,10 +23,9 @@ from autogen_ext.models.ollama import OllamaChatCompletionClient
|
|
21 |
from markdown_pdf import MarkdownPdf, Section
|
22 |
import traceback
|
23 |
import soundfile as sf
|
24 |
-
import
|
25 |
from pydub import AudioSegment
|
26 |
from TTS.api import TTS
|
27 |
-
from gradio_pdf import PDF
|
28 |
|
29 |
# Set up logging
|
30 |
logging.basicConfig(
|
@@ -37,8 +38,11 @@ logging.basicConfig(
|
|
37 |
)
|
38 |
logger = logging.getLogger(__name__)
|
39 |
|
40 |
-
# Set up environment
|
|
|
|
|
41 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
|
|
42 |
|
43 |
# Define Pydantic model for slide data
|
44 |
class Slide(BaseModel):
|
@@ -83,6 +87,17 @@ def search_web(query: str, serpapi_key: str) -> str:
|
|
83 |
logger.error("Unexpected error during search: %s", str(e))
|
84 |
return f"Unexpected error during search: {str(e)}"
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
# Function to get model client based on selected service
|
87 |
def get_model_client(service, api_key):
|
88 |
if service == "OpenAI-gpt-4o-2024-08-06":
|
@@ -129,7 +144,7 @@ def clean_script_text(script):
|
|
129 |
return script
|
130 |
|
131 |
# Helper function to validate and convert speaker audio (MP3 or WAV)
|
132 |
-
async def validate_and_convert_speaker_audio(speaker_audio
|
133 |
if not os.path.exists(speaker_audio):
|
134 |
logger.error("Speaker audio file does not exist: %s", speaker_audio)
|
135 |
return None
|
@@ -142,8 +157,9 @@ async def validate_and_convert_speaker_audio(speaker_audio, temp_dir):
|
|
142 |
audio = AudioSegment.from_mp3(speaker_audio)
|
143 |
# Convert to mono, 22050 Hz
|
144 |
audio = audio.set_channels(1).set_frame_rate(22050)
|
145 |
-
|
146 |
-
|
|
|
147 |
elif ext == ".wav":
|
148 |
speaker_wav = speaker_audio
|
149 |
else:
|
@@ -161,9 +177,9 @@ async def validate_and_convert_speaker_audio(speaker_audio, temp_dir):
|
|
161 |
if data.ndim == 2:
|
162 |
logger.info("Converting stereo WAV to mono: %s", speaker_wav)
|
163 |
data = data.mean(axis=1)
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
|
168 |
logger.info("Validated speaker audio: %s", speaker_wav)
|
169 |
return speaker_wav
|
@@ -198,7 +214,9 @@ def extract_json_from_message(message):
|
|
198 |
match = re.search(pattern, content, re.DOTALL)
|
199 |
if match:
|
200 |
try:
|
201 |
-
|
|
|
|
|
202 |
except json.JSONDecodeError as e:
|
203 |
logger.error("Failed to parse JSON from TextMessage: %s, Content: %s", e, content)
|
204 |
# Fallback: Try raw JSON array
|
@@ -206,7 +224,9 @@ def extract_json_from_message(message):
|
|
206 |
match = re.search(json_pattern, content, re.DOTALL)
|
207 |
if match:
|
208 |
try:
|
209 |
-
|
|
|
|
|
210 |
except json.JSONDecodeError as e:
|
211 |
logger.error("Failed to parse fallback JSON from TextMessage: %s, Content: %s", e, content)
|
212 |
# Fallback: Try any JSON-like structure
|
@@ -243,14 +263,18 @@ def extract_json_from_message(message):
|
|
243 |
match = re.search(pattern, content, re.DOTALL)
|
244 |
if match:
|
245 |
try:
|
246 |
-
|
|
|
|
|
247 |
except json.JSONDecodeError as e:
|
248 |
logger.error("Failed to parse JSON from HandoffMessage context: %s, Content: %s", e, content)
|
249 |
json_pattern = r"\[\s*\{.*?\}\s*\]"
|
250 |
match = re.search(json_pattern, content, re.DOTALL)
|
251 |
if match:
|
252 |
try:
|
253 |
-
|
|
|
|
|
254 |
except json.JSONDecodeError as e:
|
255 |
logger.error("Failed to parse fallback JSON from HandoffMessage context: %s, Content: %s", e, content)
|
256 |
try:
|
@@ -269,7 +293,7 @@ def extract_json_from_message(message):
|
|
269 |
return None
|
270 |
|
271 |
# Function to generate Markdown and convert to PDF (portrait, centered)
|
272 |
-
def generate_slides_pdf(slides
|
273 |
pdf = MarkdownPdf()
|
274 |
|
275 |
for slide in slides:
|
@@ -288,48 +312,14 @@ def generate_slides_pdf(slides, temp_dir):
|
|
288 |
"""
|
289 |
pdf.add_section(Section(markdown_content, toc=False))
|
290 |
|
291 |
-
pdf_file = os.path.join(
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
zip_path = os.path.join(temp_dir, "lecture_outputs.zip")
|
300 |
-
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
301 |
-
# Add slides PDF
|
302 |
-
pdf_file = os.path.join(temp_dir, "slides.pdf")
|
303 |
-
if os.path.exists(pdf_file):
|
304 |
-
zipf.write(pdf_file, "slides.pdf")
|
305 |
-
|
306 |
-
# Add audio files
|
307 |
-
for i, audio_file in enumerate(audio_files):
|
308 |
-
if audio_file and os.path.exists(audio_file):
|
309 |
-
zipf.write(audio_file, f"slide_{i+1}.wav")
|
310 |
-
|
311 |
-
# Add raw and cleaned scripts
|
312 |
-
for i in range(len(slides)):
|
313 |
-
raw_script_file = os.path.join(temp_dir, f"slide_{i+1}_raw_script.txt")
|
314 |
-
cleaned_script_file = os.path.join(temp_dir, f"slide_{i+1}_script.txt")
|
315 |
-
if os.path.exists(raw_script_file):
|
316 |
-
zipf.write(raw_script_file, f"slide_{i+1}_raw_script.txt")
|
317 |
-
if os.path.exists(cleaned_script_file):
|
318 |
-
zipf.write(cleaned_script_file, f"slide_{i+1}_script.txt")
|
319 |
-
|
320 |
-
logger.info("Created ZIP file: %s", zip_path)
|
321 |
-
return zip_path
|
322 |
-
|
323 |
-
# Helper function for progress HTML
|
324 |
-
def html_with_progress(label, progress):
|
325 |
-
return f"""
|
326 |
-
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
327 |
-
<div style="width: 100%; background-color: #FFFFFF; border-radius: 10px; overflow: hidden; margin-bottom: 20px;">
|
328 |
-
<div style="width: {progress}%; height: 30px; background-color: #4CAF50; border-radius: 10px;"></div>
|
329 |
-
</div>
|
330 |
-
<h2 style="font-style: italic; color: #555;">{label}</h2>
|
331 |
-
</div>
|
332 |
-
"""
|
333 |
|
334 |
# Async function to update audio preview
|
335 |
async def update_audio_preview(audio_file):
|
@@ -341,37 +331,50 @@ async def update_audio_preview(audio_file):
|
|
341 |
# Async function to generate lecture materials and audio
|
342 |
async def on_generate(api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides):
|
343 |
if not serpapi_key:
|
344 |
-
yield
|
|
|
|
|
|
|
|
|
|
|
345 |
return
|
346 |
|
347 |
-
#
|
348 |
-
|
349 |
-
|
350 |
-
tts = None
|
351 |
-
try:
|
352 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
353 |
-
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
354 |
-
logger.info("TTS model initialized on %s", device)
|
355 |
-
except Exception as e:
|
356 |
-
logger.error("Failed to initialize TTS model: %s", str(e))
|
357 |
-
yield html_with_progress(f"TTS model initialization failed: {str(e)}", 0)
|
358 |
-
return
|
359 |
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
Example for 2 content slides:
|
376 |
```json
|
377 |
[
|
@@ -382,14 +385,14 @@ Example for 2 content slides:
|
|
382 |
{{"title": "Thank You", "content": "Thank you message"}}
|
383 |
]
|
384 |
```""",
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
|
394 |
Example for 1 content slide:
|
395 |
```json
|
@@ -400,373 +403,412 @@ Example for 1 content slide:
|
|
400 |
"Thanks for, um, attending today!"
|
401 |
]
|
402 |
```""",
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that
|
412 |
Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
|
413 |
""")
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
max_slide_retries = 2
|
438 |
slide_retry_count = 0
|
439 |
|
440 |
while slide_retry_count <= max_slide_retries:
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
|
|
|
|
|
|
448 |
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
if isinstance(message, HandoffMessage):
|
454 |
-
logger.info("Handoff from %s to %s, Context: %s", source, message.target, message.context)
|
455 |
-
if source == "research_agent" and message.target == "slide_agent":
|
456 |
-
progress = 25
|
457 |
-
label = "Slides: generating..."
|
458 |
-
yield html_with_progress(label, progress)
|
459 |
-
await asyncio.sleep(0.1)
|
460 |
-
elif source == "slide_agent" and message.target == "script_agent":
|
461 |
-
if slides is None:
|
462 |
-
logger.warning("Slide Agent handoff without slides JSON")
|
463 |
-
extracted_json = extract_json_from_message(message)
|
464 |
-
if extracted_json:
|
465 |
-
slides = extracted_json
|
466 |
-
logger.info("Extracted slides JSON from HandoffMessage context: %s", slides)
|
467 |
-
if slides is None:
|
468 |
-
label = "Slides: failed to generate..."
|
469 |
-
yield html_with_progress(label, progress)
|
470 |
-
await asyncio.sleep(0.1)
|
471 |
-
progress = 50
|
472 |
-
label = "Scripts: generating..."
|
473 |
-
yield html_with_progress(label, progress)
|
474 |
-
await asyncio.sleep(0.1)
|
475 |
-
elif source == "script_agent" and message.target == "feynman_agent":
|
476 |
-
if scripts is None:
|
477 |
-
logger.warning("Script Agent handoff without scripts JSON")
|
478 |
-
extracted_json = extract_json_from_message(message)
|
479 |
-
if extracted_json:
|
480 |
-
scripts = extracted_json
|
481 |
-
logger.info("Extracted scripts JSON from HandoffMessage context: %s", scripts)
|
482 |
-
progress = 75
|
483 |
-
label = "Review: in progress..."
|
484 |
-
yield html_with_progress(label, progress)
|
485 |
-
await asyncio.sleep(0.1)
|
486 |
-
|
487 |
-
elif source == "research_agent" and isinstance(message, TextMessage) and "handoff_to_slide_agent" in message.content:
|
488 |
-
logger.info("Research Agent completed research")
|
489 |
progress = 25
|
490 |
label = "Slides: generating..."
|
491 |
yield html_with_progress(label, progress)
|
492 |
await asyncio.sleep(0.1)
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
logger.warning("Generated %d slides, expected %d. Retrying...", len(slides), expected_slide_count)
|
503 |
-
slide_retry_count += 1
|
504 |
-
if slide_retry_count <= max_slide_retries:
|
505 |
-
# Re-prompt slide agent
|
506 |
-
retry_message = TextMessage(
|
507 |
-
content=f"Please generate EXACTLY {num_slides} content slides plus 1 quiz, 1 assignment, and 1 thank-you slide (total {num_slides + 3}).",
|
508 |
-
source="user",
|
509 |
-
recipient="slide_agent"
|
510 |
-
)
|
511 |
-
task_result.messages.append(retry_message)
|
512 |
-
slides = None
|
513 |
-
continue
|
514 |
-
else:
|
515 |
-
yield html_with_progress(f"Failed to generate correct number of slides after {max_slide_retries} retries. Expected {expected_slide_count}, got {len(slides)}.", progress)
|
516 |
-
return
|
517 |
-
# Save slide content to individual files
|
518 |
-
for i, slide in enumerate(slides):
|
519 |
-
content_file = os.path.join(temp_dir, f"slide_{i+1}_content.txt")
|
520 |
-
try:
|
521 |
-
with open(content_file, "w", encoding="utf-8") as f:
|
522 |
-
f.write(slide["content"])
|
523 |
-
logger.info("Saved slide content to %s: %s", content_file, slide["content"])
|
524 |
-
except Exception as e:
|
525 |
-
logger.error("Error saving slide content to %s: %s", content_file, str(e))
|
526 |
-
progress = 50
|
527 |
-
label = "Scripts: generating..."
|
528 |
yield html_with_progress(label, progress)
|
529 |
await asyncio.sleep(0.1)
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
with open(script_file, "w", encoding="utf-8") as f:
|
544 |
-
f.write(script)
|
545 |
-
logger.info("Saved raw script to %s: %s", script_file, script)
|
546 |
-
except Exception as e:
|
547 |
-
logger.error("Error saving raw script to %s: %s", script_file, str(e))
|
548 |
-
progress = 75
|
549 |
-
label = "Scripts generated and saved. Reviewing..."
|
550 |
-
yield html_with_progress(label, progress)
|
551 |
-
await asyncio.sleep(0.1)
|
552 |
-
else:
|
553 |
-
logger.warning("No JSON extracted from script_agent message: %s", message.to_text())
|
554 |
-
if script_retry_count < max_script_retries:
|
555 |
-
script_retry_count += 1
|
556 |
-
logger.info("Retrying script generation (attempt %d/%d)", script_retry_count, max_script_retries)
|
557 |
-
# Re-prompt script agent
|
558 |
-
retry_message = TextMessage(
|
559 |
-
content="Please generate scripts for the slides as per your instructions.",
|
560 |
-
source="user",
|
561 |
-
recipient="script_agent"
|
562 |
-
)
|
563 |
-
task_result.messages.append(retry_message)
|
564 |
-
continue
|
565 |
-
|
566 |
-
elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
|
567 |
-
logger.info("Feynman Agent completed lecture review: %s", message.content)
|
568 |
-
progress = 90
|
569 |
-
label = "Lecture materials ready. Generating audio..."
|
570 |
yield html_with_progress(label, progress)
|
571 |
await asyncio.sleep(0.1)
|
572 |
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
yield html_with_progress(error_message, progress)
|
580 |
-
return
|
581 |
-
|
582 |
-
expected_slide_count = num_slides + 3
|
583 |
-
if len(slides) != expected_slide_count:
|
584 |
-
logger.error("Final validation failed: Expected %d slides, received %d", expected_slide_count, len(slides))
|
585 |
-
yield html_with_progress(f"Incorrect number of slides. Expected {expected_slide_count}, got {len(slides)}.", progress)
|
586 |
-
return
|
587 |
-
|
588 |
-
if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
|
589 |
-
logger.error("Scripts are not a list of strings: %s", scripts)
|
590 |
-
yield html_with_progress("Invalid script format. Scripts must be a list of strings.", progress)
|
591 |
-
return
|
592 |
|
593 |
-
|
594 |
-
logger.
|
595 |
-
|
596 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
597 |
|
598 |
-
|
599 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
600 |
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
return
|
608 |
-
|
609 |
-
|
610 |
-
|
611 |
-
|
612 |
-
|
613 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
614 |
|
615 |
-
|
616 |
-
|
617 |
-
|
618 |
-
f.write(cleaned_script or "")
|
619 |
-
logger.info("Saved cleaned script to %s: %s", script_file, cleaned_script)
|
620 |
-
except Exception as e:
|
621 |
-
logger.error("Error saving cleaned script to %s: %s", script_file, str(e))
|
622 |
|
623 |
-
|
624 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
625 |
audio_files.append(None)
|
626 |
progress = 90 + ((i + 1) / len(scripts)) * 10
|
627 |
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
628 |
yield html_with_progress(label, progress)
|
629 |
await asyncio.sleep(0.1)
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
|
639 |
-
|
640 |
-
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
|
645 |
-
|
646 |
-
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
657 |
-
audio_files.append(None)
|
658 |
-
progress = 90 + ((i + 1) / len(scripts)) * 10
|
659 |
-
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
660 |
-
yield html_with_progress(label, progress)
|
661 |
-
await asyncio.sleep(0.1)
|
662 |
-
break
|
663 |
-
|
664 |
-
# Create ZIP file of all outputs
|
665 |
-
zip_path = create_outputs_zip(temp_dir, slides, audio_files, scripts)
|
666 |
-
|
667 |
-
# Prepare UI output
|
668 |
-
slides_info = json.dumps({"slides": [
|
669 |
-
{"title": slide["title"], "content": slide["content"]}
|
670 |
-
for slide in slides
|
671 |
-
], "audioFiles": audio_files})
|
672 |
-
|
673 |
-
html_output = f"""
|
674 |
-
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
675 |
-
<div id="slide-content" style="flex: 1; overflow: auto;">
|
676 |
-
<div id="pdf-viewer"></div>
|
677 |
-
</div>
|
678 |
-
<div style="padding: 20px;">
|
679 |
-
<div id="progress-bar" style="width: 100%; height: 5px; background-color: #ddd; border-radius: 2px; margin-bottom: 10px;">
|
680 |
-
<div id="progress-fill" style="width: {(1/len(slides)*100)}%; height: 100%; background-color: #4CAF50; border-radius: 2px;"></div>
|
681 |
-
</div>
|
682 |
-
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
683 |
-
<button onclick="prevSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
684 |
-
<button onclick="togglePlay()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
685 |
-
<button onclick="nextSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
686 |
-
</div>
|
687 |
-
<p id="slide-counter" style="text-align: center;">Slide 1 of {len(slides)}</p>
|
688 |
-
</div>
|
689 |
</div>
|
690 |
-
<
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
|
|
|
|
|
|
698 |
|
699 |
-
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
|
704 |
-
|
705 |
-
|
706 |
-
|
707 |
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
|
717 |
-
|
718 |
-
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
|
732 |
-
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
|
738 |
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
|
747 |
|
748 |
-
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
}}
|
754 |
-
}});
|
755 |
}}
|
756 |
}});
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
|
|
|
|
|
|
|
|
770 |
|
771 |
# Gradio interface
|
772 |
with gr.Blocks(title="Agent Feynman") as demo:
|
@@ -801,8 +843,6 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
801 |
</div>
|
802 |
"""
|
803 |
slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html)
|
804 |
-
pdf_display = PDF(label="Lecture Slides PDF")
|
805 |
-
outputs_zip = gr.File(label="Download Outputs (PDF, Audio, Scripts)")
|
806 |
|
807 |
speaker_audio.change(
|
808 |
fn=update_audio_preview,
|
@@ -813,7 +853,7 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
813 |
generate_btn.click(
|
814 |
fn=on_generate,
|
815 |
inputs=[api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides],
|
816 |
-
outputs=[slide_display
|
817 |
)
|
818 |
|
819 |
if __name__ == "__main__":
|
|
|
1 |
+
# Note: For Huggingface Spaces, ensure the Dockerfile includes:
|
2 |
+
# RUN mkdir -p /tmp/cache/
|
3 |
+
# RUN chmod a+rwx -R /tmp/cache/
|
4 |
+
# ENV TRANSFORMERS_CACHE=/tmp/cache/
|
5 |
import os
|
6 |
import json
|
7 |
import re
|
|
|
10 |
import logging
|
11 |
import torch
|
12 |
import random
|
|
|
|
|
13 |
from serpapi import GoogleSearch
|
14 |
from pydantic import BaseModel
|
15 |
from autogen_agentchat.agents import AssistantAgent
|
|
|
23 |
from markdown_pdf import MarkdownPdf, Section
|
24 |
import traceback
|
25 |
import soundfile as sf
|
26 |
+
import tempfile
|
27 |
from pydub import AudioSegment
|
28 |
from TTS.api import TTS
|
|
|
29 |
|
30 |
# Set up logging
|
31 |
logging.basicConfig(
|
|
|
38 |
)
|
39 |
logger = logging.getLogger(__name__)
|
40 |
|
41 |
+
# Set up environment for Huggingface Spaces
|
42 |
+
OUTPUT_DIR = "/data/outputs" # Persistent storage in Huggingface Spaces
|
43 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
44 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
45 |
+
gr.set_static_paths(paths=[OUTPUT_DIR]) # Expose OUTPUT_DIR for file access
|
46 |
|
47 |
# Define Pydantic model for slide data
|
48 |
class Slide(BaseModel):
|
|
|
87 |
logger.error("Unexpected error during search: %s", str(e))
|
88 |
return f"Unexpected error during search: {str(e)}"
|
89 |
|
90 |
+
# Define helper function for progress HTML
|
91 |
+
def html_with_progress(label, progress):
|
92 |
+
return f"""
|
93 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
94 |
+
<div style="width: 100%; background-color: #FFFFFF; border-radius: 10px; overflow: hidden; margin-bottom: 20px;">
|
95 |
+
<div style="width: {progress}%; height: 30px; background-color: #4CAF50; border-radius: 10px;"></div>
|
96 |
+
</div>
|
97 |
+
<h2 style="font-style: italic; color: #555;">{label}</h2>
|
98 |
+
</div>
|
99 |
+
"""
|
100 |
+
|
101 |
# Function to get model client based on selected service
|
102 |
def get_model_client(service, api_key):
|
103 |
if service == "OpenAI-gpt-4o-2024-08-06":
|
|
|
144 |
return script
|
145 |
|
146 |
# Helper function to validate and convert speaker audio (MP3 or WAV)
|
147 |
+
async def validate_and_convert_speaker_audio(speaker_audio):
|
148 |
if not os.path.exists(speaker_audio):
|
149 |
logger.error("Speaker audio file does not exist: %s", speaker_audio)
|
150 |
return None
|
|
|
157 |
audio = AudioSegment.from_mp3(speaker_audio)
|
158 |
# Convert to mono, 22050 Hz
|
159 |
audio = audio.set_channels(1).set_frame_rate(22050)
|
160 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
161 |
+
audio.export(temp_file.name, format="wav")
|
162 |
+
speaker_wav = temp_file.name
|
163 |
elif ext == ".wav":
|
164 |
speaker_wav = speaker_audio
|
165 |
else:
|
|
|
177 |
if data.ndim == 2:
|
178 |
logger.info("Converting stereo WAV to mono: %s", speaker_wav)
|
179 |
data = data.mean(axis=1)
|
180 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
181 |
+
sf.write(temp_file.name, data, samplerate)
|
182 |
+
speaker_wav = temp_file.name
|
183 |
|
184 |
logger.info("Validated speaker audio: %s", speaker_wav)
|
185 |
return speaker_wav
|
|
|
214 |
match = re.search(pattern, content, re.DOTALL)
|
215 |
if match:
|
216 |
try:
|
217 |
+
parsed = json.loads(match.group(1))
|
218 |
+
logger.info("Parsed JSON from TextMessage: %s", parsed)
|
219 |
+
return parsed
|
220 |
except json.JSONDecodeError as e:
|
221 |
logger.error("Failed to parse JSON from TextMessage: %s, Content: %s", e, content)
|
222 |
# Fallback: Try raw JSON array
|
|
|
224 |
match = re.search(json_pattern, content, re.DOTALL)
|
225 |
if match:
|
226 |
try:
|
227 |
+
parsed = json.loads(match.group(0))
|
228 |
+
logger.info("Parsed fallback JSON from TextMessage: %s", parsed)
|
229 |
+
return parsed
|
230 |
except json.JSONDecodeError as e:
|
231 |
logger.error("Failed to parse fallback JSON from TextMessage: %s, Content: %s", e, content)
|
232 |
# Fallback: Try any JSON-like structure
|
|
|
263 |
match = re.search(pattern, content, re.DOTALL)
|
264 |
if match:
|
265 |
try:
|
266 |
+
parsed = json.loads(match.group(1))
|
267 |
+
logger.info("Parsed JSON from HandoffMessage context: %s", parsed)
|
268 |
+
return parsed
|
269 |
except json.JSONDecodeError as e:
|
270 |
logger.error("Failed to parse JSON from HandoffMessage context: %s, Content: %s", e, content)
|
271 |
json_pattern = r"\[\s*\{.*?\}\s*\]"
|
272 |
match = re.search(json_pattern, content, re.DOTALL)
|
273 |
if match:
|
274 |
try:
|
275 |
+
parsed = json.loads(match.group(0))
|
276 |
+
logger.info("Parsed fallback JSON from HandoffMessage context: %s", parsed)
|
277 |
+
return parsed
|
278 |
except json.JSONDecodeError as e:
|
279 |
logger.error("Failed to parse fallback JSON from HandoffMessage context: %s, Content: %s", e, content)
|
280 |
try:
|
|
|
293 |
return None
|
294 |
|
295 |
# Function to generate Markdown and convert to PDF (portrait, centered)
|
296 |
+
def generate_slides_pdf(slides):
|
297 |
pdf = MarkdownPdf()
|
298 |
|
299 |
for slide in slides:
|
|
|
312 |
"""
|
313 |
pdf.add_section(Section(markdown_content, toc=False))
|
314 |
|
315 |
+
pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
|
316 |
+
try:
|
317 |
+
pdf.save(pdf_file)
|
318 |
+
logger.info("Generated PDF slides (portrait): %s", pdf_file)
|
319 |
+
return pdf_file
|
320 |
+
except Exception as e:
|
321 |
+
logger.error("Failed to generate PDF: %s", str(e))
|
322 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
323 |
|
324 |
# Async function to update audio preview
|
325 |
async def update_audio_preview(audio_file):
|
|
|
331 |
# Async function to generate lecture materials and audio
|
332 |
async def on_generate(api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides):
|
333 |
if not serpapi_key:
|
334 |
+
yield f"""
|
335 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
336 |
+
<h2 style="color: #d9534f;">SerpApi key required</h2>
|
337 |
+
<p style="margin-top: 20px;">Please provide a valid SerpApi key and try again.</p>
|
338 |
+
</div>
|
339 |
+
"""
|
340 |
return
|
341 |
|
342 |
+
# Ensure output directory exists
|
343 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
344 |
+
logger.info("Output directory set to: %s", OUTPUT_DIR)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
|
346 |
+
# Initialize TTS model
|
347 |
+
tts = None
|
348 |
+
try:
|
349 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
350 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
351 |
+
logger.info("TTS model initialized on %s", device)
|
352 |
+
except Exception as e:
|
353 |
+
logger.error("Failed to initialize TTS model: %s", str(e))
|
354 |
+
yield f"""
|
355 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
356 |
+
<h2 style="color: #d9534f;">TTS model initialization failed</h2>
|
357 |
+
<p style="margin-top: 20px;">Error: {str(e)}</p>
|
358 |
+
<p>Please ensure the Coqui TTS model is properly installed and try again.</p>
|
359 |
+
</div>
|
360 |
+
"""
|
361 |
+
return
|
362 |
+
|
363 |
+
model_client = get_model_client(api_service, api_key)
|
364 |
+
|
365 |
+
research_agent = AssistantAgent(
|
366 |
+
name="research_agent",
|
367 |
+
model_client=model_client,
|
368 |
+
handoffs=["slide_agent"],
|
369 |
+
system_message="You are a Research Agent. Use the search_web tool to gather information on the topic and keywords from the initial message. Summarize the findings concisely in a single message, then use the handoff_to_slide_agent tool to pass the task to the Slide Agent. Do not produce any other output.",
|
370 |
+
tools=[search_web]
|
371 |
+
)
|
372 |
+
slide_agent = AssistantAgent(
|
373 |
+
name="slide_agent",
|
374 |
+
model_client=model_client,
|
375 |
+
handoffs=["script_agent"],
|
376 |
+
system_message=f"""
|
377 |
+
You are a Slide Agent. Using the research from the conversation history, generate EXACTLY {num_slides} content slides, plus 1 quiz slide, 1 assignment slide, and 1 thank-you slide, for a TOTAL of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, with each slide as an object with 'title' and 'content' keys. Ensure the JSON is valid, contains EXACTLY {num_slides + 3} slides, and matches the specified count before proceeding. Do not include explanatory text, comments, or other messages. After outputting, use the handoff_to_script_agent tool.
|
378 |
Example for 2 content slides:
|
379 |
```json
|
380 |
[
|
|
|
385 |
{{"title": "Thank You", "content": "Thank you message"}}
|
386 |
]
|
387 |
```""",
|
388 |
+
output_content_type=None,
|
389 |
+
reflect_on_tool_use=False
|
390 |
+
)
|
391 |
+
script_agent = AssistantAgent(
|
392 |
+
name="script_agent",
|
393 |
+
model_client=model_client,
|
394 |
+
handoffs=["feynman_agent"],
|
395 |
+
system_message=f"""
|
396 |
You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
|
397 |
Example for 1 content slide:
|
398 |
```json
|
|
|
403 |
"Thanks for, um, attending today!"
|
404 |
]
|
405 |
```""",
|
406 |
+
output_content_type=None,
|
407 |
+
reflect_on_tool_use=False
|
408 |
+
)
|
409 |
+
feynman_agent = AssistantAgent(
|
410 |
+
name="feynman_agent",
|
411 |
+
model_client=model_client,
|
412 |
+
handoffs=[],
|
413 |
+
system_message=f"""
|
414 |
+
You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that EXACTLY {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
|
415 |
Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
|
416 |
""")
|
417 |
+
|
418 |
+
swarm = Swarm(
|
419 |
+
participants=[research_agent, slide_agent, script_agent, feynman_agent],
|
420 |
+
termination_condition=HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
|
421 |
+
)
|
422 |
+
|
423 |
+
progress = 0
|
424 |
+
label = "Research: in progress..."
|
425 |
+
yield html_with_progress(label, progress)
|
426 |
+
await asyncio.sleep(0.1)
|
427 |
+
|
428 |
+
initial_message = f"""
|
429 |
+
Lecture Title: {title}
|
430 |
+
Topic: {topic}
|
431 |
+
Additional Instructions: {instructions}
|
432 |
+
Audience: {lecture_type}
|
433 |
+
Number of Content Slides: {num_slides}
|
434 |
+
Please start by researching the topic.
|
435 |
+
"""
|
436 |
+
logger.info("Starting lecture generation for topic: %s", topic)
|
437 |
+
|
438 |
+
slides = None
|
439 |
+
scripts = None
|
440 |
+
error_html = """
|
441 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
442 |
+
<h2 style="color: #d9534f;">Failed to generate lecture materials</h2>
|
443 |
+
<p style="margin-top: 20px;">Please try again with different parameters or a different model.</p>
|
444 |
+
</div>
|
445 |
+
"""
|
446 |
+
|
447 |
+
try:
|
448 |
max_slide_retries = 2
|
449 |
slide_retry_count = 0
|
450 |
|
451 |
while slide_retry_count <= max_slide_retries:
|
452 |
+
logger.info("Slide generation attempt %d/%d", slide_retry_count + 1, max_slide_retries)
|
453 |
+
task_result = await Console(swarm.run_stream(task=initial_message))
|
454 |
+
logger.info("Swarm execution completed")
|
455 |
+
|
456 |
+
script_retry_count = 0
|
457 |
+
max_script_retries = 2
|
458 |
+
|
459 |
+
for message in task_result.messages:
|
460 |
+
source = getattr(message, 'source', getattr(message, 'sender', None))
|
461 |
+
logger.debug("Processing message from %s, type: %s, content: %s", source, type(message), message.to_text() if hasattr(message, 'to_text') else str(message))
|
462 |
|
463 |
+
if isinstance(message, HandoffMessage):
|
464 |
+
logger.info("Handoff from %s to %s", source, message.target)
|
465 |
+
if source == "research_agent" and message.target == "slide_agent":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
466 |
progress = 25
|
467 |
label = "Slides: generating..."
|
468 |
yield html_with_progress(label, progress)
|
469 |
await asyncio.sleep(0.1)
|
470 |
+
elif source == "slide_agent" and message.target == "script_agent":
|
471 |
+
if slides is None:
|
472 |
+
logger.warning("Slide Agent handoff without slides JSON")
|
473 |
+
extracted_json = extract_json_from_message(message)
|
474 |
+
if extracted_json:
|
475 |
+
slides = extracted_json
|
476 |
+
logger.info("Extracted slides JSON from HandoffMessage context: %s", slides)
|
477 |
+
if slides is None:
|
478 |
+
label = "Slides: failed to generate..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
479 |
yield html_with_progress(label, progress)
|
480 |
await asyncio.sleep(0.1)
|
481 |
+
progress = 50
|
482 |
+
label = "Scripts: generating..."
|
483 |
+
yield html_with_progress(label, progress)
|
484 |
+
await asyncio.sleep(0.1)
|
485 |
+
elif source == "script_agent" and message.target == "feynman_agent":
|
486 |
+
if scripts is None:
|
487 |
+
logger.warning("Script Agent handoff without scripts JSON")
|
488 |
+
extracted_json = extract_json_from_message(message)
|
489 |
+
if extracted_json:
|
490 |
+
scripts = extracted_json
|
491 |
+
logger.info("Extracted scripts JSON from HandoffMessage context: %s", scripts)
|
492 |
+
progress = 75
|
493 |
+
label = "Review: in progress..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
494 |
yield html_with_progress(label, progress)
|
495 |
await asyncio.sleep(0.1)
|
496 |
|
497 |
+
elif source == "research_agent" and isinstance(message, TextMessage) and "handoff_to_slide_agent" in message.content:
|
498 |
+
logger.info("Research Agent completed research")
|
499 |
+
progress = 25
|
500 |
+
label = "Slides: generating..."
|
501 |
+
yield html_with_progress(label, progress)
|
502 |
+
await asyncio.sleep(0.1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
|
504 |
+
elif source == "slide_agent" and isinstance(message, (TextMessage, StructuredMessage)):
|
505 |
+
logger.debug("Slide Agent message received: %s", message.to_text())
|
506 |
+
extracted_json = extract_json_from_message(message)
|
507 |
+
if extracted_json:
|
508 |
+
slides = extracted_json
|
509 |
+
logger.info("Slide Agent generated %d slides: %s", len(slides), slides)
|
510 |
+
# Save slide content to individual files
|
511 |
+
for i, slide in enumerate(slides):
|
512 |
+
content_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_content.txt")
|
513 |
+
try:
|
514 |
+
with open(content_file, "w", encoding="utf-8") as f:
|
515 |
+
f.write(slide["content"])
|
516 |
+
logger.info("Saved slide content to %s: %s", content_file, slide["content"])
|
517 |
+
except Exception as e:
|
518 |
+
logger.error("Error saving slide content to %s: %s", content_file, str(e))
|
519 |
+
progress = 50
|
520 |
+
label = "Scripts: generating..."
|
521 |
+
yield html_with_progress(label, progress)
|
522 |
+
await asyncio.sleep(0.1)
|
523 |
+
else:
|
524 |
+
logger.warning("No JSON extracted from slide_agent message: %s", message.to_text())
|
525 |
|
526 |
+
elif source == "script_agent" and isinstance(message, (TextMessage, StructuredMessage)):
|
527 |
+
logger.debug("Script Agent message received: %s", message.to_text())
|
528 |
+
extracted_json = extract_json_from_message(message)
|
529 |
+
if extracted_json:
|
530 |
+
scripts = extracted_json
|
531 |
+
logger.info("Script Agent generated scripts for %d slides: %s", len(scripts), scripts)
|
532 |
+
# Save raw scripts to individual files
|
533 |
+
for i, script in enumerate(scripts):
|
534 |
+
script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_raw_script.txt")
|
535 |
+
try:
|
536 |
+
with open(script_file, "w", encoding="utf-8") as f:
|
537 |
+
f.write(script)
|
538 |
+
logger.info("Saved raw script to %s: %s", script_file, script)
|
539 |
+
except Exception as e:
|
540 |
+
logger.error("Error saving raw script to %s: %s", script_file, str(e))
|
541 |
+
progress = 75
|
542 |
+
label = "Scripts generated and saved. Reviewing..."
|
543 |
+
yield html_with_progress(label, progress)
|
544 |
+
await asyncio.sleep(0.1)
|
545 |
+
else:
|
546 |
+
logger.warning("No JSON extracted from script_agent message: %s", message.to_text())
|
547 |
+
if script_retry_count < max_script_retries:
|
548 |
+
script_retry_count += 1
|
549 |
+
logger.info("Retrying script generation (attempt %d/%d)", script_retry_count, max_script_retries)
|
550 |
+
retry_message = TextMessage(
|
551 |
+
content="Please generate scripts for the slides as per your instructions.",
|
552 |
+
source="user",
|
553 |
+
recipient="script_agent"
|
554 |
+
)
|
555 |
+
task_result.messages.append(retry_message)
|
556 |
+
continue
|
557 |
|
558 |
+
elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
|
559 |
+
logger.info("Feynman Agent completed lecture review: %s", message.content)
|
560 |
+
progress = 90
|
561 |
+
label = "Lecture materials ready. Generating audio..."
|
562 |
+
yield html_with_progress(label, progress)
|
563 |
+
await asyncio.sleep(0.1)
|
564 |
+
|
565 |
+
# Validate slide count
|
566 |
+
expected_slide_count = num_slides + 3
|
567 |
+
if slides and len(slides) == expected_slide_count:
|
568 |
+
logger.info("Slide count validated: %d slides received", len(slides))
|
569 |
+
break
|
570 |
+
else:
|
571 |
+
logger.warning("Incorrect slide count: expected %d, got %d", expected_slide_count, len(slides) if slides else 0)
|
572 |
+
slide_retry_count += 1
|
573 |
+
slides = None
|
574 |
+
if slide_retry_count <= max_slide_retries:
|
575 |
+
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count + 1, max_slide_retries)
|
576 |
+
task_result = await Console(swarm.run_stream(task=initial_message))
|
577 |
+
else:
|
578 |
+
logger.error("Max slide retries reached")
|
579 |
+
yield f"""
|
580 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
581 |
+
<h2 style="color: #d9534f;">Incorrect number of slides</h2>
|
582 |
+
<p style="margin-top: 20px;">Expected {expected_slide_count} slides ({num_slides} content slides + quiz, assignment, thank-you), but generated {len(slides) if slides else 0}. Please try again with a different model.</p>
|
583 |
+
</div>
|
584 |
+
"""
|
585 |
return
|
586 |
+
|
587 |
+
logger.info("Slides state: %s", "Generated" if slides else "None")
|
588 |
+
logger.info("Scripts state: %s", "Generated" if scripts else "None")
|
589 |
+
if not slides or not scripts:
|
590 |
+
error_message = f"Failed to generate {'slides and scripts' if not slides and not scripts else 'slides' if not slides else 'scripts'}"
|
591 |
+
error_message += f". Received {len(slides) if slides else 0} slides and {len(scripts) if scripts else 0} scripts."
|
592 |
+
logger.error("%s", error_message)
|
593 |
+
logger.debug("Dumping all messages for debugging:")
|
594 |
+
for msg in task_result.messages:
|
595 |
+
source = getattr(msg, 'source', getattr(msg, 'sender', None))
|
596 |
+
logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
|
597 |
+
yield error_html
|
598 |
+
return
|
599 |
+
|
600 |
+
if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
|
601 |
+
logger.error("Scripts are not a list of strings: %s", scripts)
|
602 |
+
yield f"""
|
603 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
604 |
+
<h2 style="color: #d9534f;">Invalid script format</h2>
|
605 |
+
<p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
|
606 |
+
</div>
|
607 |
+
"""
|
608 |
+
return
|
609 |
+
|
610 |
+
if len(scripts) != expected_slide_count:
|
611 |
+
logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
|
612 |
+
yield f"""
|
613 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
614 |
+
<h2 style="color: #d9534f;">Mismatch in slides and scripts</h2>
|
615 |
+
<p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
|
616 |
+
</div>
|
617 |
+
"""
|
618 |
+
return
|
619 |
+
|
620 |
+
# Generate PDF from slides
|
621 |
+
try:
|
622 |
+
pdf_file = generate_slides_pdf(slides)
|
623 |
+
except Exception as e:
|
624 |
+
logger.error("PDF generation failed: %s", str(e))
|
625 |
+
yield f"""
|
626 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
627 |
+
<h2 style="color: #d9534f;">PDF generation failed</h2>
|
628 |
+
<p style="margin-top: 20px;">Error: {str(e)}</p>
|
629 |
+
<p>Please try again or check the lecture_generation.log for details.</p>
|
630 |
+
</div>
|
631 |
+
"""
|
632 |
+
return
|
633 |
+
|
634 |
+
audio_files = []
|
635 |
+
speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
|
636 |
+
validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
|
637 |
+
if not validated_speaker_wav:
|
638 |
+
logger.error("Invalid speaker audio after conversion, skipping TTS")
|
639 |
+
yield f"""
|
640 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
641 |
+
<h2 style="color: #d9534f;">Invalid speaker audio</h2>
|
642 |
+
<p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
|
643 |
+
</div>
|
644 |
+
"""
|
645 |
+
return
|
646 |
+
|
647 |
+
# Process audio generation sequentially with retries
|
648 |
+
for i, script in enumerate(scripts):
|
649 |
+
cleaned_script = clean_script_text(script)
|
650 |
+
audio_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}.wav")
|
651 |
+
script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_script.txt")
|
652 |
+
|
653 |
+
# Save cleaned script
|
654 |
+
try:
|
655 |
+
with open(script_file, "w", encoding="utf-8") as f:
|
656 |
+
f.write(cleaned_script or "")
|
657 |
+
logger.info("Saved cleaned script to %s: %s", script_file, cleaned_script)
|
658 |
+
except Exception as e:
|
659 |
+
logger.error("Error saving cleaned script to %s: %s", script_file, str(e))
|
660 |
+
|
661 |
+
if not cleaned_script:
|
662 |
+
logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
|
663 |
+
audio_files.append(None)
|
664 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10
|
665 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
666 |
+
yield html_with_progress(label, progress)
|
667 |
+
await asyncio.sleep(0.1)
|
668 |
+
continue
|
669 |
+
|
670 |
+
max_retries = 2
|
671 |
+
for attempt in range(max_retries + 1):
|
672 |
+
try:
|
673 |
+
current_text = cleaned_script
|
674 |
+
if attempt > 0:
|
675 |
+
sentences = re.split(r"[.!?]+", cleaned_script)
|
676 |
+
sentences = [s.strip() for s in sentences if s.strip()][:2]
|
677 |
+
current_text = ". ".join(sentences) + "."
|
678 |
+
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
679 |
|
680 |
+
success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
|
681 |
+
if not success:
|
682 |
+
raise RuntimeError("TTS generation failed")
|
|
|
|
|
|
|
|
|
683 |
|
684 |
+
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
685 |
+
audio_files.append(audio_file)
|
686 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10
|
687 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
688 |
+
yield html_with_progress(label, progress)
|
689 |
+
await asyncio.sleep(0.1)
|
690 |
+
break
|
691 |
+
except Exception as e:
|
692 |
+
logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
|
693 |
+
if attempt == max_retries:
|
694 |
+
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
695 |
audio_files.append(None)
|
696 |
progress = 90 + ((i + 1) / len(scripts)) * 10
|
697 |
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
698 |
yield html_with_progress(label, progress)
|
699 |
await asyncio.sleep(0.1)
|
700 |
+
break
|
701 |
+
|
702 |
+
# Prepare output HTML with gr.File for PDF and gr.FileExplorer for outputs
|
703 |
+
slides_info = json.dumps({"slides": [
|
704 |
+
{"title": slide["title"], "content": slide["content"]}
|
705 |
+
for slide in slides
|
706 |
+
], "audioFiles": audio_files})
|
707 |
+
|
708 |
+
html_output = f"""
|
709 |
+
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between; padding: 20px;">
|
710 |
+
<div style="flex: 1; overflow: auto;">
|
711 |
+
<h3>Lecture Slides</h3>
|
712 |
+
<p>Download or view the slides PDF below (opens in your browser's PDF viewer):</p>
|
713 |
+
<gradio-file value="{pdf_file}" label="Slides PDF" file_types=[".pdf"]></gradio-file>
|
714 |
+
<h3>Generated Files</h3>
|
715 |
+
<p>Explore all generated files (PDF, audio, scripts) in the output directory:</p>
|
716 |
+
<gradio-file-explorer glob="/data/outputs/*" label="Output Directory"></gradio-file-explorer>
|
717 |
+
</div>
|
718 |
+
<div style="padding: 20px;">
|
719 |
+
<div id="progress-bar" style="width: 100%; height: 5px; background-color: #ddd; border-radius: 2px; margin-bottom: 10px;">
|
720 |
+
<div id="progress-fill" style="width: {(1/len(slides)*100)}%; height: 100%; background-color: #4CAF50; border-radius: 2px;"></div>
|
721 |
+
</div>
|
722 |
+
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
723 |
+
<button onclick="prevSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
724 |
+
<button onclick="togglePlay()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
725 |
+
<button onclick="nextSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
726 |
</div>
|
727 |
+
<p id="slide-counter" style="text-align: center;">Slide 1 of {len(slides)}</p>
|
728 |
+
</div>
|
729 |
+
</div>
|
730 |
+
<script>
|
731 |
+
const lectureData = {slides_info};
|
732 |
+
let currentSlide = 0;
|
733 |
+
const totalSlides = lectureData.slides.length;
|
734 |
+
const slideCounter = document.getElementById('slide-counter');
|
735 |
+
const progressFill = document.getElementById('progress-fill');
|
736 |
+
let audioElements = [];
|
737 |
+
let currentAudio = null;
|
738 |
|
739 |
+
for (let i = 0; i < totalSlides; i++) {{
|
740 |
+
if (lectureData.audioFiles && lectureData.audioFiles[i]) {{
|
741 |
+
const audio = new Audio('/gradio_api/file=' + lectureData.audioFiles[i]);
|
742 |
+
audioElements.push(audio);
|
743 |
+
}} else {{
|
744 |
+
audioElements.push(null);
|
745 |
+
}}
|
746 |
+
}}
|
747 |
|
748 |
+
function updateSlide() {{
|
749 |
+
slideCounter.textContent = `Slide ${{currentSlide + 1}} of ${{totalSlides}}`;
|
750 |
+
progressFill.style.width = `${{(currentSlide + 1) / totalSlides * 100}}%`;
|
751 |
|
752 |
+
if (currentAudio) {{
|
753 |
+
currentAudio.pause();
|
754 |
+
currentAudio.currentTime = 0;
|
755 |
+
}}
|
756 |
|
757 |
+
if (audioElements[currentSlide]) {{
|
758 |
+
currentAudio = audioElements[currentSlide];
|
759 |
+
currentAudio.play().catch(e => console.error('Audio play failed:', e));
|
760 |
+
}} else {{
|
761 |
+
currentAudio = null;
|
762 |
+
}}
|
763 |
+
}}
|
764 |
|
765 |
+
function prevSlide() {{
|
766 |
+
if (currentSlide > 0) {{
|
767 |
+
currentSlide--;
|
768 |
+
updateSlide();
|
769 |
+
}}
|
770 |
+
}}
|
771 |
|
772 |
+
function nextSlide() {{
|
773 |
+
if (currentSlide < totalSlides - 1) {{
|
774 |
+
currentSlide++;
|
775 |
+
updateSlide();
|
776 |
+
}}
|
777 |
+
}}
|
778 |
|
779 |
+
function togglePlay() {{
|
780 |
+
if (!audioElements[currentSlide]) return;
|
781 |
+
if (currentAudio.paused) {{
|
782 |
+
currentAudio.play().catch(e => console.error('Audio play failed:', e));
|
783 |
+
}} else {{
|
784 |
+
currentAudio.pause();
|
785 |
+
}}
|
786 |
+
}}
|
787 |
|
788 |
+
audioElements.forEach((audio, index) => {{
|
789 |
+
if (audio) {{
|
790 |
+
audio.addEventListener('ended', () => {{
|
791 |
+
if (index < totalSlides - 1) {{
|
792 |
+
nextSlide();
|
|
|
|
|
793 |
}}
|
794 |
}});
|
795 |
+
}}
|
796 |
+
}});
|
797 |
+
</script>
|
798 |
+
"""
|
799 |
+
logger.info("Lecture generation completed successfully")
|
800 |
+
yield html_output
|
801 |
+
|
802 |
+
except Exception as e:
|
803 |
+
logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
|
804 |
+
yield f"""
|
805 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
806 |
+
<h2 style="color: #d9534f;">Error during lecture generation</h2>
|
807 |
+
<p style="margin-top: 10px; font-size: 16px;">{str(e)}</p>
|
808 |
+
<p style="margin-top: 20px;">Please try again or check the lecture_generation.log for details.</p>
|
809 |
+
</div>
|
810 |
+
"""
|
811 |
+
return
|
812 |
|
813 |
# Gradio interface
|
814 |
with gr.Blocks(title="Agent Feynman") as demo:
|
|
|
843 |
</div>
|
844 |
"""
|
845 |
slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html)
|
|
|
|
|
846 |
|
847 |
speaker_audio.change(
|
848 |
fn=update_audio_preview,
|
|
|
853 |
generate_btn.click(
|
854 |
fn=on_generate,
|
855 |
inputs=[api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides],
|
856 |
+
outputs=[slide_display]
|
857 |
)
|
858 |
|
859 |
if __name__ == "__main__":
|