Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -16,11 +16,14 @@ from autogen_agentchat.messages import TextMessage, HandoffMessage, StructuredMe
|
|
16 |
from autogen_ext.models.anthropic import AnthropicChatCompletionClient
|
17 |
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
18 |
from autogen_ext.models.ollama import OllamaChatCompletionClient
|
|
|
|
|
19 |
import traceback
|
20 |
import soundfile as sf
|
21 |
import tempfile
|
22 |
from pydub import AudioSegment
|
23 |
from TTS.api import TTS
|
|
|
24 |
|
25 |
# Set up logging
|
26 |
logging.basicConfig(
|
@@ -34,18 +37,17 @@ logging.basicConfig(
|
|
34 |
logger = logging.getLogger(__name__)
|
35 |
|
36 |
# Set up environment
|
37 |
-
OUTPUT_DIR = os.path.join(os.getcwd(), "outputs")
|
38 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
39 |
logger.info(f"Using output directory: {OUTPUT_DIR}")
|
40 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
41 |
|
42 |
-
# Initialize TTS model
|
43 |
-
|
44 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
45 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
46 |
logger.info("TTS model initialized on %s", device)
|
47 |
|
48 |
-
# Define
|
49 |
class Slide(BaseModel):
|
50 |
title: str
|
51 |
content: str
|
@@ -53,7 +55,7 @@ class Slide(BaseModel):
|
|
53 |
class SlidesOutput(BaseModel):
|
54 |
slides: list[Slide]
|
55 |
|
56 |
-
#
|
57 |
def search_web(query: str, serpapi_key: str) -> str:
|
58 |
try:
|
59 |
params = {
|
@@ -88,18 +90,68 @@ def search_web(query: str, serpapi_key: str) -> str:
|
|
88 |
logger.error("Unexpected error during search: %s", str(e))
|
89 |
return None
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
# Define helper function for progress HTML
|
92 |
def html_with_progress(label, progress):
|
93 |
return f"""
|
94 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
95 |
-
<div style="width: 70%; background-color:
|
96 |
<div style="width: {progress}%; height: 15px; background-color: #4CAF50; border-radius: 80px;"></div>
|
97 |
</div>
|
98 |
<h2 style="font-style: italic; color: #555;">{label}</h2>
|
99 |
</div>
|
100 |
"""
|
101 |
|
102 |
-
#
|
103 |
def get_model_client(service, api_key):
|
104 |
if service == "OpenAI-gpt-4o-2024-08-06":
|
105 |
return OpenAIChatCompletionClient(model="gpt-4o-2024-08-06", api_key=api_key)
|
@@ -109,6 +161,19 @@ def get_model_client(service, api_key):
|
|
109 |
return OpenAIChatCompletionClient(model="gemini-1.5-flash", api_key=api_key)
|
110 |
elif service == "Ollama-llama3.2":
|
111 |
return OllamaChatCompletionClient(model="llama3.2")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
else:
|
113 |
raise ValueError("Invalid service")
|
114 |
|
@@ -292,61 +357,38 @@ def extract_json_from_message(message):
|
|
292 |
logger.warning("Unsupported message type for JSON extraction: %s", type(message))
|
293 |
return None
|
294 |
|
295 |
-
#
|
296 |
-
def generate_markdown_slides(slides, title, speaker="Prof. AI Feynman", date="April 26th, 2025"):
|
297 |
-
try:
|
298 |
-
markdown_slides = []
|
299 |
-
for i, slide in enumerate(slides):
|
300 |
-
slide_number = i + 1
|
301 |
-
content = slide['content']
|
302 |
-
|
303 |
-
# First slide has no header/footer, others have header and footer
|
304 |
-
if i == 0:
|
305 |
-
slide_md = f"""
|
306 |
-
# {slide['title']}
|
307 |
-
{content}
|
308 |
-
|
309 |
-
**{speaker}**
|
310 |
-
*{date}*
|
311 |
-
"""
|
312 |
-
else:
|
313 |
-
slide_md = f"""
|
314 |
-
##### Slide {slide_number}, {slide['title']}
|
315 |
-
{content}
|
316 |
-
|
317 |
-
, {title} {speaker}, {date}
|
318 |
-
"""
|
319 |
-
markdown_slides.append(slide_md.strip())
|
320 |
-
|
321 |
-
logger.info(f"Generated Markdown slides for: {title}: {markdown_slides}")
|
322 |
-
return markdown_slides
|
323 |
-
except Exception as e:
|
324 |
-
logger.error(f"Failed to generate Markdown slides: {str(e)}")
|
325 |
-
logger.error(traceback.format_exc())
|
326 |
-
return None
|
327 |
-
|
328 |
-
# Async function to update audio preview
|
329 |
async def update_audio_preview(audio_file):
|
330 |
if audio_file:
|
331 |
logger.info("Updating audio preview for file: %s", audio_file)
|
332 |
return audio_file
|
333 |
return None
|
334 |
|
335 |
-
#
|
336 |
-
def
|
337 |
zip_path = os.path.join(OUTPUT_DIR, "lecture_files.zip")
|
338 |
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
339 |
-
for file_path in
|
340 |
if os.path.exists(file_path):
|
341 |
-
|
|
|
|
|
|
|
342 |
logger.info("Created zip file: %s", zip_path)
|
343 |
return zip_path
|
344 |
|
345 |
-
#
|
|
|
|
|
|
|
|
|
|
|
346 |
async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides):
|
347 |
model_client = get_model_client(api_service, api_key)
|
348 |
|
349 |
-
|
|
|
|
|
350 |
|
351 |
research_agent = AssistantAgent(
|
352 |
name="research_agent",
|
@@ -360,14 +402,23 @@ async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_
|
|
360 |
model_client=model_client,
|
361 |
handoffs=["script_agent"],
|
362 |
system_message=f"""
|
363 |
-
You are a Slide Agent. Using the research from the conversation history and the specified number of slides ({
|
364 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
```json
|
366 |
[
|
367 |
-
{{"title": "
|
368 |
-
{{"title": "Slide
|
|
|
369 |
]
|
370 |
```""",
|
|
|
371 |
output_content_type=None,
|
372 |
reflect_on_tool_use=False
|
373 |
)
|
@@ -376,13 +427,18 @@ Example output for 2 slides:
|
|
376 |
model_client=model_client,
|
377 |
handoffs=["feynman_agent"],
|
378 |
system_message=f"""
|
379 |
-
You are a Script Agent. Access the JSON array of {total_slides} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone as a professor would deliver it. Avoid using non-verbal fillers such as "um," "you know," or "like." Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
|
380 |
-
|
|
|
|
|
|
|
|
|
|
|
381 |
```json
|
382 |
[
|
383 |
-
"
|
384 |
-
"
|
385 |
-
"
|
386 |
]
|
387 |
```""",
|
388 |
output_content_type=None,
|
@@ -393,8 +449,8 @@ Example for 3 slides:
|
|
393 |
model_client=model_client,
|
394 |
handoffs=[],
|
395 |
system_message=f"""
|
396 |
-
You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {total_slides} slides and {total_slides} scripts are received. Output a confirmation message summarizing the number of slides and
|
397 |
-
Example: 'Received {total_slides} slides
|
398 |
""")
|
399 |
|
400 |
swarm = Swarm(
|
@@ -406,7 +462,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
406 |
label = "Research: in progress..."
|
407 |
yield (
|
408 |
html_with_progress(label, progress),
|
409 |
-
[]
|
410 |
)
|
411 |
await asyncio.sleep(0.1)
|
412 |
|
@@ -414,13 +470,14 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
414 |
Lecture Title: {title}
|
415 |
Lecture Content Description: {lecture_content_description}
|
416 |
Audience: {lecture_type}
|
417 |
-
Number of Slides: {
|
418 |
Please start by researching the topic, or proceed without research if search is unavailable.
|
419 |
"""
|
420 |
-
logger.info("Starting lecture generation for title: %s with %d slides", title, total_slides)
|
421 |
|
422 |
slides = None
|
423 |
scripts = None
|
|
|
424 |
error_html = """
|
425 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
426 |
<h2 style="color: #d9534f;">Failed to generate lecture materials</h2>
|
@@ -452,7 +509,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
452 |
label = "Slides: generating..."
|
453 |
yield (
|
454 |
html_with_progress(label, progress),
|
455 |
-
[]
|
456 |
)
|
457 |
await asyncio.sleep(0.1)
|
458 |
elif source == "slide_agent" and message.target == "script_agent":
|
@@ -467,7 +524,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
467 |
slide_retry_count += 1
|
468 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
469 |
retry_message = TextMessage(
|
470 |
-
content=f"Please generate exactly {total_slides} slides as per your instructions.",
|
471 |
source="user",
|
472 |
recipient="slide_agent"
|
473 |
)
|
@@ -477,7 +534,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
477 |
label = "Scripts: generating..."
|
478 |
yield (
|
479 |
html_with_progress(label, progress),
|
480 |
-
[]
|
481 |
)
|
482 |
await asyncio.sleep(0.1)
|
483 |
elif source == "script_agent" and message.target == "feynman_agent":
|
@@ -491,7 +548,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
491 |
label = "Review: in progress..."
|
492 |
yield (
|
493 |
html_with_progress(label, progress),
|
494 |
-
[]
|
495 |
)
|
496 |
await asyncio.sleep(0.1)
|
497 |
|
@@ -501,7 +558,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
501 |
label = "Slides: generating..."
|
502 |
yield (
|
503 |
html_with_progress(label, progress),
|
504 |
-
[]
|
505 |
)
|
506 |
await asyncio.sleep(0.1)
|
507 |
|
@@ -516,25 +573,21 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
516 |
slide_retry_count += 1
|
517 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
518 |
retry_message = TextMessage(
|
519 |
-
content=f"Please generate exactly {total_slides} slides as per your instructions.",
|
520 |
source="user",
|
521 |
recipient="slide_agent"
|
522 |
)
|
523 |
task_result.messages.append(retry_message)
|
524 |
continue
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
f.write(slide["content"])
|
530 |
-
logger.info("Saved slide content to %s", content_file)
|
531 |
-
except Exception as e:
|
532 |
-
logger.error("Error saving slide content to %s: %s", content_file, str(e))
|
533 |
progress = 50
|
534 |
label = "Scripts: generating..."
|
535 |
yield (
|
536 |
html_with_progress(label, progress),
|
537 |
-
[]
|
538 |
)
|
539 |
await asyncio.sleep(0.1)
|
540 |
else:
|
@@ -543,7 +596,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
543 |
slide_retry_count += 1
|
544 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
545 |
retry_message = TextMessage(
|
546 |
-
content=f"Please generate exactly {total_slides} slides as per your instructions.",
|
547 |
source="user",
|
548 |
recipient="slide_agent"
|
549 |
)
|
@@ -568,7 +621,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
568 |
label = "Scripts generated and saved. Reviewing..."
|
569 |
yield (
|
570 |
html_with_progress(label, progress),
|
571 |
-
[]
|
572 |
)
|
573 |
await asyncio.sleep(0.1)
|
574 |
else:
|
@@ -588,20 +641,18 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
588 |
logger.info("Feynman Agent completed lecture review: %s", message.content)
|
589 |
progress = 90
|
590 |
label = "Lecture materials ready. Generating audio..."
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
|
595 |
-
zip_file = create_zip_of_txt_files(txt_file_paths)
|
596 |
yield (
|
597 |
html_with_progress(label, progress),
|
598 |
-
|
599 |
-
zip_file
|
600 |
)
|
601 |
await asyncio.sleep(0.1)
|
602 |
|
603 |
logger.info("Slides state: %s", "Generated" if slides else "None")
|
604 |
logger.info("Scripts state: %s", "Generated" if scripts else "None")
|
|
|
605 |
if not slides or not scripts:
|
606 |
error_message = f"Failed to generate {'slides and scripts' if not slides and not scripts else 'slides' if not slides else 'scripts'}"
|
607 |
error_message += f". Received {len(slides) if slides else 0} slides and {len(scripts) if scripts else 0} scripts."
|
@@ -612,7 +663,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
612 |
logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
|
613 |
yield (
|
614 |
error_html,
|
615 |
-
[]
|
616 |
)
|
617 |
return
|
618 |
|
@@ -625,7 +676,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
625 |
<p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
|
626 |
</div>
|
627 |
""",
|
628 |
-
[]
|
629 |
)
|
630 |
return
|
631 |
|
@@ -638,7 +689,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
638 |
<p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
|
639 |
</div>
|
640 |
""",
|
641 |
-
[]
|
642 |
)
|
643 |
return
|
644 |
|
@@ -651,192 +702,21 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
651 |
<p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
|
652 |
</div>
|
653 |
""",
|
654 |
-
[]
|
655 |
)
|
656 |
return
|
657 |
|
658 |
-
|
659 |
-
|
660 |
-
logger.error("Failed to generate Markdown slides")
|
661 |
-
yield (
|
662 |
-
f"""
|
663 |
-
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
664 |
-
<h2 style="color: #d9534f;">Failed to generate slides</h2>
|
665 |
-
<p style="margin-top: 20px;">Please try again.</p>
|
666 |
-
</div>
|
667 |
-
""",
|
668 |
-
[], None
|
669 |
-
)
|
670 |
-
return
|
671 |
-
|
672 |
-
# Generate initial audio timeline with placeholders
|
673 |
audio_urls = [None] * len(scripts)
|
674 |
audio_timeline = ""
|
675 |
for i in range(len(scripts)):
|
676 |
audio_timeline += f'<audio id="audio-{i+1}" controls src="" style="display: inline-block; margin: 0 10px; width: 200px;"><span>Loading...</span></audio>'
|
677 |
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
|
682 |
-
zip_file = create_zip_of_txt_files(txt_file_paths)
|
683 |
-
|
684 |
-
# Yield the lecture materials immediately after slides and scripts are ready
|
685 |
-
slides_info = json.dumps({"slides": markdown_slides, "audioFiles": audio_urls})
|
686 |
-
html_output = f"""
|
687 |
-
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
688 |
-
<div id="slide-content" style="flex: 1; overflow: auto; padding: 20px; text-align: center; background-color: #fff; color: #333;"></div>
|
689 |
-
<div style="padding: 20px; text-align: center;">
|
690 |
-
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
691 |
-
{audio_timeline}
|
692 |
-
</div>
|
693 |
-
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
694 |
-
<button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
695 |
-
<button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
696 |
-
<button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
697 |
-
<button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">☐</button>
|
698 |
-
</div>
|
699 |
-
</div>
|
700 |
-
</div>
|
701 |
-
<script>
|
702 |
-
const lectureData = {slides_info};
|
703 |
-
let currentSlide = 0;
|
704 |
-
const totalSlides = lectureData.slides.length;
|
705 |
-
let audioElements = [];
|
706 |
-
let isPlaying = false;
|
707 |
-
|
708 |
-
// Populate audio elements
|
709 |
-
for (let i = 0; i < totalSlides; i++) {{
|
710 |
-
const audio = document.getElementById(`audio-${{i+1}}`);
|
711 |
-
audioElements.push(audio);
|
712 |
-
}}
|
713 |
-
|
714 |
-
function renderSlide() {{
|
715 |
-
const slideContent = document.getElementById('slide-content');
|
716 |
-
if (lectureData.slides[currentSlide]) {{
|
717 |
-
// Since the content is already Markdown-rendered by Gradio, we can set it directly
|
718 |
-
slideContent.innerHTML = lectureData.slides[currentSlide].replace(/\\n/g, '<br>');
|
719 |
-
console.log("Rendering slide:", lectureData.slides[currentSlide]);
|
720 |
-
}} else {{
|
721 |
-
slideContent.innerHTML = '<h2>No slide content available</h2>';
|
722 |
-
console.log("No slide content for index:", currentSlide);
|
723 |
-
}}
|
724 |
-
}}
|
725 |
-
|
726 |
-
function updateSlide() {{
|
727 |
-
renderSlide();
|
728 |
-
audioElements.forEach(audio => {{
|
729 |
-
if (audio && audio.pause) {{
|
730 |
-
audio.pause();
|
731 |
-
audio.currentTime = 0;
|
732 |
-
}}
|
733 |
-
}});
|
734 |
-
}}
|
735 |
-
|
736 |
-
function updateAudioSources(audioUrls) {{
|
737 |
-
audioUrls.forEach((url, index) => {{
|
738 |
-
const audio = audioElements[index];
|
739 |
-
if (audio && url && audio.src !== url) {{
|
740 |
-
audio.src = url;
|
741 |
-
audio.load(); // Force reload the audio element
|
742 |
-
console.log(`Updated audio-${{index+1}} src to:`, url);
|
743 |
-
}}
|
744 |
-
}});
|
745 |
-
}}
|
746 |
-
|
747 |
-
function prevSlide() {{
|
748 |
-
if (currentSlide > 0) {{
|
749 |
-
currentSlide--;
|
750 |
-
updateSlide();
|
751 |
-
const audio = audioElements[currentSlide];
|
752 |
-
if (audio && audio.play && isPlaying) {{
|
753 |
-
audio.play().catch(e => console.error('Audio play failed:', e));
|
754 |
-
}}
|
755 |
-
}}
|
756 |
-
}}
|
757 |
-
|
758 |
-
function nextSlide() {{
|
759 |
-
if (currentSlide < totalSlides - 1) {{
|
760 |
-
currentSlide++;
|
761 |
-
updateSlide();
|
762 |
-
const audio = audioElements[currentSlide];
|
763 |
-
if (audio && audio.play && isPlaying) {{
|
764 |
-
audio.play().catch(e => console.error('Audio play failed:', e));
|
765 |
-
}}
|
766 |
-
}}
|
767 |
-
}}
|
768 |
-
|
769 |
-
function playAll() {{
|
770 |
-
isPlaying = !isPlaying;
|
771 |
-
const playBtn = document.getElementById('play-btn');
|
772 |
-
playBtn.textContent = isPlaying ? '⏸' : '⏯';
|
773 |
-
if (!isPlaying) {{
|
774 |
-
audioElements.forEach(audio => {{
|
775 |
-
if (audio && audio.pause) {{
|
776 |
-
audio.pause();
|
777 |
-
audio.currentTime = 0;
|
778 |
-
}}
|
779 |
-
}});
|
780 |
-
return;
|
781 |
-
}}
|
782 |
-
let index = currentSlide;
|
783 |
-
function playNext() {{
|
784 |
-
if (index >= totalSlides || !isPlaying) {{
|
785 |
-
isPlaying = false;
|
786 |
-
playBtn.textContent = '⏯';
|
787 |
-
return;
|
788 |
-
}}
|
789 |
-
currentSlide = index;
|
790 |
-
updateSlide();
|
791 |
-
const audio = audioElements[index];
|
792 |
-
if (audio && audio.play) {{
|
793 |
-
audio.play().then(() => {{
|
794 |
-
audio.addEventListener('ended', () => {{
|
795 |
-
index++;
|
796 |
-
playNext();
|
797 |
-
}}, {{ once: true }});
|
798 |
-
}}).catch(e => {{
|
799 |
-
console.error('Audio play failed:', e);
|
800 |
-
index++;
|
801 |
-
playNext();
|
802 |
-
}});
|
803 |
-
}} else {{
|
804 |
-
index++;
|
805 |
-
playNext();
|
806 |
-
}}
|
807 |
-
}}
|
808 |
-
playNext();
|
809 |
-
}}
|
810 |
-
|
811 |
-
function toggleFullScreen() {{
|
812 |
-
const container = document.getElementById('lecture-container');
|
813 |
-
if (!document.fullscreenElement) {{
|
814 |
-
container.requestFullscreen().catch(err => {{
|
815 |
-
console.error('Error attempting to enable full-screen mode:', err);
|
816 |
-
}});
|
817 |
-
}} else {{
|
818 |
-
document.exitFullscreen();
|
819 |
-
}}
|
820 |
-
}}
|
821 |
-
|
822 |
-
// Attach event listeners
|
823 |
-
document.getElementById('prev-btn').addEventListener('click', prevSlide);
|
824 |
-
document.getElementById('play-btn').addEventListener('click', playAll);
|
825 |
-
document.getElementById('next-btn').addEventListener('click', nextSlide);
|
826 |
-
document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
|
827 |
-
|
828 |
-
// Initialize first slide
|
829 |
-
renderSlide();
|
830 |
-
</script>
|
831 |
-
"""
|
832 |
-
logger.info("Yielding lecture materials before audio generation")
|
833 |
-
yield (
|
834 |
-
html_output,
|
835 |
-
txt_file_paths,
|
836 |
-
zip_file
|
837 |
-
)
|
838 |
|
839 |
-
# Now generate audio files progressively
|
840 |
audio_files = []
|
841 |
validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
|
842 |
if not validated_speaker_wav:
|
@@ -848,7 +728,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
848 |
<p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
|
849 |
</div>
|
850 |
""",
|
851 |
-
[]
|
852 |
)
|
853 |
return
|
854 |
|
@@ -869,11 +749,10 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
869 |
audio_files.append(None)
|
870 |
audio_urls[i] = None
|
871 |
progress = 90 + ((i + 1) / len(scripts)) * 10
|
872 |
-
label = f"
|
873 |
yield (
|
874 |
-
|
875 |
-
|
876 |
-
zip_file
|
877 |
)
|
878 |
await asyncio.sleep(0.1)
|
879 |
continue
|
@@ -894,171 +773,13 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
894 |
|
895 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
896 |
audio_files.append(audio_file)
|
897 |
-
audio_urls[i] =
|
898 |
progress = 90 + ((i + 1) / len(scripts)) * 10
|
899 |
-
label = f"
|
900 |
-
|
901 |
-
# Update audio timeline with the new audio URL
|
902 |
-
audio_timeline = ""
|
903 |
-
for j, url in enumerate(audio_urls):
|
904 |
-
if url:
|
905 |
-
audio_timeline += f'<audio id="audio-{j+1}" controls src="{url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
|
906 |
-
else:
|
907 |
-
audio_timeline += f'<audio id="audio-{j+1}" controls src="" style="display: inline-block; margin: 0 10px; width: 200px;"><span>Loading...</span></audio>'
|
908 |
-
|
909 |
-
html_output = f"""
|
910 |
-
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
911 |
-
<div id="slide-content" style="flex: 1; overflow: auto; padding: 20px; text-align: center; background-color: #fff; color: #333;"></div>
|
912 |
-
<div style="padding: 20px; text-align: center;">
|
913 |
-
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
914 |
-
{audio_timeline}
|
915 |
-
</div>
|
916 |
-
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
917 |
-
<button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
918 |
-
<button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
919 |
-
<button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
920 |
-
<button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">☐</button>
|
921 |
-
</div>
|
922 |
-
</div>
|
923 |
-
</div>
|
924 |
-
<script>
|
925 |
-
const lectureData = {slides_info};
|
926 |
-
let currentSlide = 0;
|
927 |
-
const totalSlides = lectureData.slides.length;
|
928 |
-
let audioElements = [];
|
929 |
-
let isPlaying = false;
|
930 |
-
|
931 |
-
// Populate audio elements
|
932 |
-
for (let i = 0; i < totalSlides; i++) {{
|
933 |
-
const audio = document.getElementById(`audio-${{i+1}}`);
|
934 |
-
audioElements.push(audio);
|
935 |
-
}}
|
936 |
-
|
937 |
-
// Update audio sources dynamically
|
938 |
-
lectureData.audioFiles = {json.dumps(audio_urls)};
|
939 |
-
updateAudioSources(lectureData.audioFiles);
|
940 |
-
|
941 |
-
function renderSlide() {{
|
942 |
-
const slideContent = document.getElementById('slide-content');
|
943 |
-
if (lectureData.slides[currentSlide]) {{
|
944 |
-
slideContent.innerHTML = lectureData.slides[currentSlide].replace(/\\n/g, '<br>');
|
945 |
-
console.log("Rendering slide:", lectureData.slides[currentSlide]);
|
946 |
-
}} else {{
|
947 |
-
slideContent.innerHTML = '<h2>No slide content available</h2>';
|
948 |
-
console.log("No slide content for index:", currentSlide);
|
949 |
-
}}
|
950 |
-
}}
|
951 |
-
|
952 |
-
function updateSlide() {{
|
953 |
-
renderSlide();
|
954 |
-
audioElements.forEach(audio => {{
|
955 |
-
if (audio && audio.pause) {{
|
956 |
-
audio.pause();
|
957 |
-
audio.currentTime = 0;
|
958 |
-
}}
|
959 |
-
}});
|
960 |
-
}}
|
961 |
-
|
962 |
-
function updateAudioSources(audioUrls) {{
|
963 |
-
audioUrls.forEach((url, index) => {{
|
964 |
-
const audio = audioElements[index];
|
965 |
-
if (audio && url && audio.src !== url) {{
|
966 |
-
audio.src = url;
|
967 |
-
audio.load();
|
968 |
-
console.log(`Updated audio-${{index+1}} src to:`, url);
|
969 |
-
}}
|
970 |
-
}});
|
971 |
-
}}
|
972 |
-
|
973 |
-
function prevSlide() {{
|
974 |
-
if (currentSlide > 0) {{
|
975 |
-
currentSlide--;
|
976 |
-
updateSlide();
|
977 |
-
const audio = audioElements[currentSlide];
|
978 |
-
if (audio && audio.play && isPlaying) {{
|
979 |
-
audio.play().catch(e => console.error('Audio play failed:', e));
|
980 |
-
}}
|
981 |
-
}}
|
982 |
-
}}
|
983 |
-
|
984 |
-
function nextSlide() {{
|
985 |
-
if (currentSlide < totalSlides - 1) {{
|
986 |
-
currentSlide++;
|
987 |
-
updateSlide();
|
988 |
-
const audio = audioElements[currentSlide];
|
989 |
-
if (audio && audio.play && isPlaying) {{
|
990 |
-
audio.play().catch(e => console.error('Audio play failed:', e));
|
991 |
-
}}
|
992 |
-
}}
|
993 |
-
}}
|
994 |
-
|
995 |
-
function playAll() {{
|
996 |
-
isPlaying = !isPlaying;
|
997 |
-
const playBtn = document.getElementById('play-btn');
|
998 |
-
playBtn.textContent = isPlaying ? '⏸' : '⏯';
|
999 |
-
if (!isPlaying) {{
|
1000 |
-
audioElements.forEach(audio => {{
|
1001 |
-
if (audio && audio.pause) {{
|
1002 |
-
audio.pause();
|
1003 |
-
audio.currentTime = 0;
|
1004 |
-
}}
|
1005 |
-
}});
|
1006 |
-
return;
|
1007 |
-
}}
|
1008 |
-
let index = currentSlide;
|
1009 |
-
function playNext() {{
|
1010 |
-
if (index >= totalSlides || !isPlaying) {{
|
1011 |
-
isPlaying = false;
|
1012 |
-
playBtn.textContent = '⏯';
|
1013 |
-
return;
|
1014 |
-
}}
|
1015 |
-
currentSlide = index;
|
1016 |
-
updateSlide();
|
1017 |
-
const audio = audioElements[index];
|
1018 |
-
if (audio && audio.play) {{
|
1019 |
-
audio.play().then(() => {{
|
1020 |
-
audio.addEventListener('ended', () => {{
|
1021 |
-
index++;
|
1022 |
-
playNext();
|
1023 |
-
}}, {{ once: true }});
|
1024 |
-
}}).catch(e => {{
|
1025 |
-
console.error('Audio play failed:', e);
|
1026 |
-
index++;
|
1027 |
-
playNext();
|
1028 |
-
}});
|
1029 |
-
}} else {{
|
1030 |
-
index++;
|
1031 |
-
playNext();
|
1032 |
-
}}
|
1033 |
-
}}
|
1034 |
-
playNext();
|
1035 |
-
}}
|
1036 |
-
|
1037 |
-
function toggleFullScreen() {{
|
1038 |
-
const container = document.getElementById('lecture-container');
|
1039 |
-
if (!document.fullscreenElement) {{
|
1040 |
-
container.requestFullscreen().catch(err => {{
|
1041 |
-
console.error('Error attempting to enable full-screen mode:', err);
|
1042 |
-
}});
|
1043 |
-
}} else {{
|
1044 |
-
document.exitFullscreen();
|
1045 |
-
}}
|
1046 |
-
}}
|
1047 |
-
|
1048 |
-
// Attach event listeners
|
1049 |
-
document.getElementById('prev-btn').addEventListener('click', prevSlide);
|
1050 |
-
document.getElementById('play-btn').addEventListener('click', playAll);
|
1051 |
-
document.getElementById('next-btn').addEventListener('click', nextSlide);
|
1052 |
-
document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
|
1053 |
-
|
1054 |
-
// Initialize first slide
|
1055 |
-
renderSlide();
|
1056 |
-
</script>
|
1057 |
-
"""
|
1058 |
yield (
|
1059 |
-
|
1060 |
-
|
1061 |
-
zip_file
|
1062 |
)
|
1063 |
await asyncio.sleep(0.1)
|
1064 |
break
|
@@ -1069,15 +790,52 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
1069 |
audio_files.append(None)
|
1070 |
audio_urls[i] = None
|
1071 |
progress = 90 + ((i + 1) / len(scripts)) * 10
|
1072 |
-
label = f"
|
1073 |
yield (
|
1074 |
-
|
1075 |
-
|
1076 |
-
zip_file
|
1077 |
)
|
1078 |
await asyncio.sleep(0.1)
|
1079 |
break
|
1080 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1081 |
logger.info("Lecture generation completed successfully")
|
1082 |
|
1083 |
except Exception as e:
|
@@ -1090,16 +848,379 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
1090 |
<p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
|
1091 |
</div>
|
1092 |
""",
|
1093 |
-
[]
|
1094 |
)
|
1095 |
return
|
1096 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1097 |
# Gradio interface
|
1098 |
-
with gr.Blocks(
|
1099 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1100 |
with gr.Row():
|
1101 |
with gr.Column(scale=1):
|
1102 |
-
with gr.Group():
|
1103 |
title = gr.Textbox(label="Lecture Title", placeholder="e.g. Introduction to AI")
|
1104 |
lecture_content_description = gr.Textbox(label="Lecture Content Description", placeholder="e.g. Focus on recent advancements")
|
1105 |
lecture_type = gr.Dropdown(["Conference", "University", "High school"], label="Audience", value="University")
|
@@ -1108,26 +1229,26 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
1108 |
"OpenAI-gpt-4o-2024-08-06",
|
1109 |
"Anthropic-claude-3-sonnet-20240229",
|
1110 |
"Google-gemini-1.5-flash",
|
1111 |
-
"Ollama-llama3.2"
|
|
|
1112 |
],
|
1113 |
label="Model",
|
1114 |
value="Google-gemini-1.5-flash"
|
1115 |
)
|
1116 |
-
api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama")
|
1117 |
-
serpapi_key = gr.Textbox(label="SerpApi Key", type="password", placeholder="Enter your SerpApi key (optional)")
|
1118 |
-
num_slides = gr.Slider(1, 20, step=1, label="Number of Slides", value=3)
|
1119 |
-
speaker_audio = gr.Audio(label="Speaker sample
|
1120 |
generate_btn = gr.Button("Generate Lecture")
|
1121 |
with gr.Column(scale=2):
|
1122 |
default_slide_html = """
|
1123 |
-
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius:
|
1124 |
-
<h2 style="font-style: italic; color: #
|
1125 |
<p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
|
1126 |
</div>
|
1127 |
"""
|
1128 |
-
slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html)
|
1129 |
-
file_output = gr.File(label="Download
|
1130 |
-
zip_output = gr.File(label="Download All Files as ZIP")
|
1131 |
|
1132 |
speaker_audio.change(
|
1133 |
fn=update_audio_preview,
|
@@ -1138,7 +1259,7 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
1138 |
generate_btn.click(
|
1139 |
fn=on_generate,
|
1140 |
inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
|
1141 |
-
outputs=[slide_display, file_output
|
1142 |
)
|
1143 |
|
1144 |
if __name__ == "__main__":
|
|
|
16 |
from autogen_ext.models.anthropic import AnthropicChatCompletionClient
|
17 |
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
18 |
from autogen_ext.models.ollama import OllamaChatCompletionClient
|
19 |
+
from autogen_ext.models.azure import AzureAIChatCompletionClient
|
20 |
+
from azure.core.credentials import AzureKeyCredential
|
21 |
import traceback
|
22 |
import soundfile as sf
|
23 |
import tempfile
|
24 |
from pydub import AudioSegment
|
25 |
from TTS.api import TTS
|
26 |
+
import markdown
|
27 |
|
28 |
# Set up logging
|
29 |
logging.basicConfig(
|
|
|
37 |
logger = logging.getLogger(__name__)
|
38 |
|
39 |
# Set up environment
|
40 |
+
OUTPUT_DIR = os.path.join(os.getcwd(), "outputs")
|
41 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
42 |
logger.info(f"Using output directory: {OUTPUT_DIR}")
|
43 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
44 |
|
45 |
+
# Initialize TTS model
|
|
|
46 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
47 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
48 |
logger.info("TTS model initialized on %s", device)
|
49 |
|
50 |
+
# Define model for slide data
|
51 |
class Slide(BaseModel):
|
52 |
title: str
|
53 |
content: str
|
|
|
55 |
class SlidesOutput(BaseModel):
|
56 |
slides: list[Slide]
|
57 |
|
58 |
+
# Search eb tool using SerpApi
|
59 |
def search_web(query: str, serpapi_key: str) -> str:
|
60 |
try:
|
61 |
params = {
|
|
|
90 |
logger.error("Unexpected error during search: %s", str(e))
|
91 |
return None
|
92 |
|
93 |
+
# Custom function to render Markdown to HTML
|
94 |
+
def render_md_to_html(md_content: str) -> str:
|
95 |
+
try:
|
96 |
+
html_content = markdown.markdown(md_content, extensions=['extra', 'fenced_code', 'tables'])
|
97 |
+
return html_content
|
98 |
+
except Exception as e:
|
99 |
+
logger.error("Failed to render Markdown to HTML: %s", str(e))
|
100 |
+
return "<div>Error rendering content</div>"
|
101 |
+
|
102 |
+
# Define create_slides tool for generating HTML slides
|
103 |
+
def create_slides(slides: list[dict], title: str, output_dir: str = OUTPUT_DIR) -> list[str]:
|
104 |
+
try:
|
105 |
+
html_files = []
|
106 |
+
template_file = os.path.join(os.getcwd(), "slide_template.html")
|
107 |
+
with open(template_file, "r", encoding="utf-8") as f:
|
108 |
+
template_content = f.read()
|
109 |
+
|
110 |
+
for i, slide in enumerate(slides):
|
111 |
+
slide_number = i + 1
|
112 |
+
md_content = slide['content']
|
113 |
+
html_content = render_md_to_html(md_content)
|
114 |
+
|
115 |
+
# Replace placeholders in the template
|
116 |
+
slide_html = template_content.replace("<!--SLIDE_NUMBER-->", str(slide_number))
|
117 |
+
slide_html = slide_html.replace("section title", f"Slide {slide_number}, {slide['title']}")
|
118 |
+
slide_html = slide_html.replace("Lecture title", title)
|
119 |
+
slide_html = slide_html.replace("<!--CONTENT-->", html_content)
|
120 |
+
slide_html = slide_html.replace("speaker name", "Prof. AI Feynman")
|
121 |
+
slide_html = slide_html.replace("date", "May 2nd, 2025")
|
122 |
+
|
123 |
+
html_file = os.path.join(output_dir, f"slide_{slide_number}.html")
|
124 |
+
with open(html_file, "w", encoding="utf-8") as f:
|
125 |
+
f.write(slide_html)
|
126 |
+
logger.info("Generated HTML slide: %s", html_file)
|
127 |
+
html_files.append(html_file)
|
128 |
+
|
129 |
+
# Save slide content as Markdown files
|
130 |
+
for i, slide in enumerate(slides):
|
131 |
+
slide_number = i + 1
|
132 |
+
md_file = os.path.join(output_dir, f"slide_{slide_number}_content.md")
|
133 |
+
with open(md_file, "w", encoding="utf-8") as f:
|
134 |
+
f.write(slide['content'])
|
135 |
+
logger.info("Saved slide content to Markdown: %s", md_file)
|
136 |
+
|
137 |
+
return html_files
|
138 |
+
|
139 |
+
except Exception as e:
|
140 |
+
logger.error("Failed to create HTML slides: %s", str(e))
|
141 |
+
return []
|
142 |
+
|
143 |
# Define helper function for progress HTML
|
144 |
def html_with_progress(label, progress):
|
145 |
return f"""
|
146 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
147 |
+
<div style="width: 70%; background-color: lightgrey; border-radius: 80px; overflow: hidden; margin-bottom: 20px;">
|
148 |
<div style="width: {progress}%; height: 15px; background-color: #4CAF50; border-radius: 80px;"></div>
|
149 |
</div>
|
150 |
<h2 style="font-style: italic; color: #555;">{label}</h2>
|
151 |
</div>
|
152 |
"""
|
153 |
|
154 |
+
# Get model client based on selected service
|
155 |
def get_model_client(service, api_key):
|
156 |
if service == "OpenAI-gpt-4o-2024-08-06":
|
157 |
return OpenAIChatCompletionClient(model="gpt-4o-2024-08-06", api_key=api_key)
|
|
|
161 |
return OpenAIChatCompletionClient(model="gemini-1.5-flash", api_key=api_key)
|
162 |
elif service == "Ollama-llama3.2":
|
163 |
return OllamaChatCompletionClient(model="llama3.2")
|
164 |
+
elif service == "Azure AI Foundry":
|
165 |
+
return AzureAIChatCompletionClient(
|
166 |
+
model="phi-4",
|
167 |
+
endpoint="https://models.inference.ai.azure.com",
|
168 |
+
credential=AzureKeyCredential(os.environ.get("GITHUB_TOKEN", "")),
|
169 |
+
model_info={
|
170 |
+
"json_output": False,
|
171 |
+
"function_calling": False,
|
172 |
+
"vision": False,
|
173 |
+
"family": "unknown",
|
174 |
+
"structured_output": False,
|
175 |
+
}
|
176 |
+
)
|
177 |
else:
|
178 |
raise ValueError("Invalid service")
|
179 |
|
|
|
357 |
logger.warning("Unsupported message type for JSON extraction: %s", type(message))
|
358 |
return None
|
359 |
|
360 |
+
# Async update audio preview
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
async def update_audio_preview(audio_file):
|
362 |
if audio_file:
|
363 |
logger.info("Updating audio preview for file: %s", audio_file)
|
364 |
return audio_file
|
365 |
return None
|
366 |
|
367 |
+
# Create a zip file of .md, .txt, and .mp3 files
|
368 |
+
def create_zip_of_files(file_paths):
|
369 |
zip_path = os.path.join(OUTPUT_DIR, "lecture_files.zip")
|
370 |
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
371 |
+
for file_path in file_paths:
|
372 |
if os.path.exists(file_path):
|
373 |
+
_, ext = os.path.splitext(file_path)
|
374 |
+
if ext in ['.md', '.txt', '.mp3']:
|
375 |
+
zipf.write(file_path, os.path.basename(file_path))
|
376 |
+
logger.info("Added %s to zip", file_path)
|
377 |
logger.info("Created zip file: %s", zip_path)
|
378 |
return zip_path
|
379 |
|
380 |
+
# Access local files
|
381 |
+
def get_gradio_file_url(local_path):
|
382 |
+
relative_path = os.path.relpath(local_path, os.getcwd())
|
383 |
+
return f"/gradio_api/file={relative_path}"
|
384 |
+
|
385 |
+
# Async generate lecture materials and audio
|
386 |
async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides):
|
387 |
model_client = get_model_client(api_service, api_key)
|
388 |
|
389 |
+
# Total slides include user-specified content slides plus Introduction and Closing slides
|
390 |
+
content_slides = num_slides
|
391 |
+
total_slides = content_slides + 2
|
392 |
|
393 |
research_agent = AssistantAgent(
|
394 |
name="research_agent",
|
|
|
402 |
model_client=model_client,
|
403 |
handoffs=["script_agent"],
|
404 |
system_message=f"""
|
405 |
+
You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({content_slides}), generate exactly {content_slides} content slides, plus an Introduction slide as the first slide and a Closing slide as the last slide, making a total of {total_slides} slides.
|
406 |
+
|
407 |
+
- The Introduction slide (first slide) should have the title "Introduction to {title}" and content containing only the lecture title, speaker name (Prof. AI Feynman), and date (May 2nd, 2025), centered, in plain text.
|
408 |
+
- The Closing slide (last slide) should have the title "Closing" and content containing only "The End\nThank you", centered, in plain text.
|
409 |
+
- The remaining {content_slides} slides should be content slides based on the lecture description and audience type, with meaningful titles and content in valid Markdown format.
|
410 |
+
|
411 |
+
Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. After generating the JSON, use the create_slides tool to produce HTML slides, then use the handoff_to_script_agent tool to pass the task to the Script Agent. Do not include any explanatory text or other messages.
|
412 |
+
|
413 |
+
Example output for 1 content slide (total 3 slides):
|
414 |
```json
|
415 |
[
|
416 |
+
{{"title": "Introduction to AI Basics", "content": "AI Basics\nProf. AI Feynman\nMay 2nd, 2025"}},
|
417 |
+
{{"title": "Slide 1: What is AI?", "content": "# What is AI?\n- Definition: Systems that mimic human intelligence\n- Key areas: ML, NLP, Robotics"}},
|
418 |
+
{{"title": "Closing", "content": "The End\nThank you"}}
|
419 |
]
|
420 |
```""",
|
421 |
+
tools=[create_slides],
|
422 |
output_content_type=None,
|
423 |
reflect_on_tool_use=False
|
424 |
)
|
|
|
427 |
model_client=model_client,
|
428 |
handoffs=["feynman_agent"],
|
429 |
system_message=f"""
|
430 |
+
You are a Script Agent model after Richard Feynman. Access the JSON array of {total_slides} slides from the conversation history, which includes an Introduction slide, {content_slides} content slides, and a Closing slide. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone, with humour as a professor feynman would deliver it. Avoid using non-verbal fillers such as "um," "you know," or "like." Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
|
431 |
+
|
432 |
+
- For the Introduction slide, the script should be a welcoming message introducing the lecture.
|
433 |
+
- For the Closing slide, the script should be a brief farewell and thank you message.
|
434 |
+
- For the content slides, summarize the slide content academically.
|
435 |
+
|
436 |
+
Example for 3 slides (1 content slide):
|
437 |
```json
|
438 |
[
|
439 |
+
"Welcome to the lecture on AI Basics. I am Professor AI Feynman, and today we will explore the fundamentals of artificial intelligence.",
|
440 |
+
"Let us begin by defining artificial intelligence: it refers to systems that mimic human intelligence, spanning key areas such as machine learning, natural language processing, and robotics.",
|
441 |
+
"That concludes our lecture on AI Basics. Thank you for your attention, and I hope you found this session insightful."
|
442 |
]
|
443 |
```""",
|
444 |
output_content_type=None,
|
|
|
449 |
model_client=model_client,
|
450 |
handoffs=[],
|
451 |
system_message=f"""
|
452 |
+
You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {total_slides} slides and {total_slides} scripts are received, including the Introduction and Closing slides. Verify that HTML slide files exist in the outputs directory. Output a confirmation message summarizing the number of slides, scripts, and HTML files status. If slides, scripts, or HTML files are missing, invalid, or do not match the expected count ({total_slides}), report the issue clearly. Use 'TERMINATE' to signal completion.
|
453 |
+
Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files. Lecture is coherent. TERMINATE'
|
454 |
""")
|
455 |
|
456 |
swarm = Swarm(
|
|
|
462 |
label = "Research: in progress..."
|
463 |
yield (
|
464 |
html_with_progress(label, progress),
|
465 |
+
[]
|
466 |
)
|
467 |
await asyncio.sleep(0.1)
|
468 |
|
|
|
470 |
Lecture Title: {title}
|
471 |
Lecture Content Description: {lecture_content_description}
|
472 |
Audience: {lecture_type}
|
473 |
+
Number of Content Slides: {content_slides}
|
474 |
Please start by researching the topic, or proceed without research if search is unavailable.
|
475 |
"""
|
476 |
+
logger.info("Starting lecture generation for title: %s with %d content slides (total %d slides)", title, content_slides, total_slides)
|
477 |
|
478 |
slides = None
|
479 |
scripts = None
|
480 |
+
html_files = []
|
481 |
error_html = """
|
482 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
483 |
<h2 style="color: #d9534f;">Failed to generate lecture materials</h2>
|
|
|
509 |
label = "Slides: generating..."
|
510 |
yield (
|
511 |
html_with_progress(label, progress),
|
512 |
+
[]
|
513 |
)
|
514 |
await asyncio.sleep(0.1)
|
515 |
elif source == "slide_agent" and message.target == "script_agent":
|
|
|
524 |
slide_retry_count += 1
|
525 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
526 |
retry_message = TextMessage(
|
527 |
+
content=f"Please generate exactly {total_slides} slides (Introduction, {content_slides} content slides, and Closing) as per your instructions.",
|
528 |
source="user",
|
529 |
recipient="slide_agent"
|
530 |
)
|
|
|
534 |
label = "Scripts: generating..."
|
535 |
yield (
|
536 |
html_with_progress(label, progress),
|
537 |
+
[]
|
538 |
)
|
539 |
await asyncio.sleep(0.1)
|
540 |
elif source == "script_agent" and message.target == "feynman_agent":
|
|
|
548 |
label = "Review: in progress..."
|
549 |
yield (
|
550 |
html_with_progress(label, progress),
|
551 |
+
[]
|
552 |
)
|
553 |
await asyncio.sleep(0.1)
|
554 |
|
|
|
558 |
label = "Slides: generating..."
|
559 |
yield (
|
560 |
html_with_progress(label, progress),
|
561 |
+
[]
|
562 |
)
|
563 |
await asyncio.sleep(0.1)
|
564 |
|
|
|
573 |
slide_retry_count += 1
|
574 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
575 |
retry_message = TextMessage(
|
576 |
+
content=f"Please generate exactly {total_slides} slides (Introduction, {content_slides} content slides, and Closing) as per your instructions.",
|
577 |
source="user",
|
578 |
recipient="slide_agent"
|
579 |
)
|
580 |
task_result.messages.append(retry_message)
|
581 |
continue
|
582 |
+
# Generate HTML slides
|
583 |
+
html_files = create_slides(slides, title)
|
584 |
+
if not html_files:
|
585 |
+
logger.error("Failed to generate HTML slides")
|
|
|
|
|
|
|
|
|
586 |
progress = 50
|
587 |
label = "Scripts: generating..."
|
588 |
yield (
|
589 |
html_with_progress(label, progress),
|
590 |
+
[]
|
591 |
)
|
592 |
await asyncio.sleep(0.1)
|
593 |
else:
|
|
|
596 |
slide_retry_count += 1
|
597 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
598 |
retry_message = TextMessage(
|
599 |
+
content=f"Please generate exactly {total_slides} slides (Introduction, {content_slides} content slides, and Closing) as per your instructions.",
|
600 |
source="user",
|
601 |
recipient="slide_agent"
|
602 |
)
|
|
|
621 |
label = "Scripts generated and saved. Reviewing..."
|
622 |
yield (
|
623 |
html_with_progress(label, progress),
|
624 |
+
[]
|
625 |
)
|
626 |
await asyncio.sleep(0.1)
|
627 |
else:
|
|
|
641 |
logger.info("Feynman Agent completed lecture review: %s", message.content)
|
642 |
progress = 90
|
643 |
label = "Lecture materials ready. Generating audio..."
|
644 |
+
file_paths = [f for f in os.listdir(OUTPUT_DIR) if f.endswith(('.md', '.txt'))]
|
645 |
+
file_paths.sort()
|
646 |
+
file_paths = [os.path.join(OUTPUT_DIR, f) for f in file_paths]
|
|
|
|
|
647 |
yield (
|
648 |
html_with_progress(label, progress),
|
649 |
+
file_paths
|
|
|
650 |
)
|
651 |
await asyncio.sleep(0.1)
|
652 |
|
653 |
logger.info("Slides state: %s", "Generated" if slides else "None")
|
654 |
logger.info("Scripts state: %s", "Generated" if scripts else "None")
|
655 |
+
logger.info("HTML files state: %s", "Generated" if html_files else "None")
|
656 |
if not slides or not scripts:
|
657 |
error_message = f"Failed to generate {'slides and scripts' if not slides and not scripts else 'slides' if not slides else 'scripts'}"
|
658 |
error_message += f". Received {len(slides) if slides else 0} slides and {len(scripts) if scripts else 0} scripts."
|
|
|
663 |
logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
|
664 |
yield (
|
665 |
error_html,
|
666 |
+
[]
|
667 |
)
|
668 |
return
|
669 |
|
|
|
676 |
<p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
|
677 |
</div>
|
678 |
""",
|
679 |
+
[]
|
680 |
)
|
681 |
return
|
682 |
|
|
|
689 |
<p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
|
690 |
</div>
|
691 |
""",
|
692 |
+
[]
|
693 |
)
|
694 |
return
|
695 |
|
|
|
702 |
<p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
|
703 |
</div>
|
704 |
""",
|
705 |
+
[]
|
706 |
)
|
707 |
return
|
708 |
|
709 |
+
# Access the generated HTML files
|
710 |
+
html_file_urls = [get_gradio_file_url(html_file) for html_file in html_files]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
711 |
audio_urls = [None] * len(scripts)
|
712 |
audio_timeline = ""
|
713 |
for i in range(len(scripts)):
|
714 |
audio_timeline += f'<audio id="audio-{i+1}" controls src="" style="display: inline-block; margin: 0 10px; width: 200px;"><span>Loading...</span></audio>'
|
715 |
|
716 |
+
file_paths = [f for f in os.listdir(OUTPUT_DIR) if f.endswith(('.md', '.txt'))]
|
717 |
+
file_paths.sort()
|
718 |
+
file_paths = [os.path.join(OUTPUT_DIR, f) for f in file_paths]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
719 |
|
|
|
720 |
audio_files = []
|
721 |
validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
|
722 |
if not validated_speaker_wav:
|
|
|
728 |
<p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
|
729 |
</div>
|
730 |
""",
|
731 |
+
[]
|
732 |
)
|
733 |
return
|
734 |
|
|
|
749 |
audio_files.append(None)
|
750 |
audio_urls[i] = None
|
751 |
progress = 90 + ((i + 1) / len(scripts)) * 10
|
752 |
+
label = f"Generating audio for slide {i + 1}/{len(scripts)}..."
|
753 |
yield (
|
754 |
+
html_with_progress(label, progress),
|
755 |
+
file_paths
|
|
|
756 |
)
|
757 |
await asyncio.sleep(0.1)
|
758 |
continue
|
|
|
773 |
|
774 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
775 |
audio_files.append(audio_file)
|
776 |
+
audio_urls[i] = get_gradio_file_url(audio_file)
|
777 |
progress = 90 + ((i + 1) / len(scripts)) * 10
|
778 |
+
label = f"Generating audio for slide {i + 1}/{len(scripts)}..."
|
779 |
+
file_paths.append(audio_file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
780 |
yield (
|
781 |
+
html_with_progress(label, progress),
|
782 |
+
file_paths
|
|
|
783 |
)
|
784 |
await asyncio.sleep(0.1)
|
785 |
break
|
|
|
790 |
audio_files.append(None)
|
791 |
audio_urls[i] = None
|
792 |
progress = 90 + ((i + 1) / len(scripts)) * 10
|
793 |
+
label = f"Generating audio for slide {i + 1}/{len(scripts)}..."
|
794 |
yield (
|
795 |
+
html_with_progress(label, progress),
|
796 |
+
file_paths
|
|
|
797 |
)
|
798 |
await asyncio.sleep(0.1)
|
799 |
break
|
800 |
|
801 |
+
# Create zip file with all materials except .html files
|
802 |
+
zip_file = create_zip_of_files(file_paths)
|
803 |
+
file_paths.append(zip_file)
|
804 |
+
|
805 |
+
# Slide hack: Render the lecture container with iframe containing HTML slides
|
806 |
+
audio_timeline = ""
|
807 |
+
for j, url in enumerate(audio_urls):
|
808 |
+
if url:
|
809 |
+
audio_timeline += f'<audio id="audio-{j+1}" controls src="{url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
|
810 |
+
else:
|
811 |
+
audio_timeline += f'<audio id="audio-{j+1}" controls src="" style="display: inline-block; margin: 0 10px; width: 200px;"><span>Audio unavailable</span></audio>'
|
812 |
+
|
813 |
+
slides_info = json.dumps({"htmlFiles": html_file_urls, "audioFiles": audio_urls})
|
814 |
+
html_output = f"""
|
815 |
+
<div id="lecture-data" style="display: none;">{slides_info}</div>
|
816 |
+
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
817 |
+
<div id="slide-content" style="flex: 1; overflow: auto; padding: 20px; text-align: center; background-color: #fff;">
|
818 |
+
<iframe id="slide-iframe" style="width: 100%; height: 100%; border: none;"></iframe>
|
819 |
+
</div>
|
820 |
+
<div style="padding: 20px; text-align: center;">
|
821 |
+
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
822 |
+
{audio_timeline}
|
823 |
+
</div>
|
824 |
+
<div style="display: center; justify-content: center; margin-bottom: 10px;">
|
825 |
+
<button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"><i class="fas fa-step-backward"></i></button>
|
826 |
+
<button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"><i class="fas fa-play"></i></button>
|
827 |
+
<button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"><i class="fas fa-step-forward"></i></button>
|
828 |
+
<button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"><i class="fas fa-expand"></i></button>
|
829 |
+
</div>
|
830 |
+
</div>
|
831 |
+
</div>
|
832 |
+
"""
|
833 |
+
logger.info("Yielding final lecture materials after audio generation")
|
834 |
+
yield (
|
835 |
+
html_output,
|
836 |
+
file_paths
|
837 |
+
)
|
838 |
+
|
839 |
logger.info("Lecture generation completed successfully")
|
840 |
|
841 |
except Exception as e:
|
|
|
848 |
<p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
|
849 |
</div>
|
850 |
""",
|
851 |
+
[]
|
852 |
)
|
853 |
return
|
854 |
|
855 |
+
# custom js for lecture container features
|
856 |
+
js_code = """
|
857 |
+
() => {
|
858 |
+
// Function to wait for an element to appear in the DOM
|
859 |
+
function waitForElement(selector, callback, maxAttempts = 50, interval = 100) {
|
860 |
+
let attempts = 0;
|
861 |
+
const intervalId = setInterval(() => {
|
862 |
+
const element = document.querySelector(selector);
|
863 |
+
if (element) {
|
864 |
+
clearInterval(intervalId);
|
865 |
+
console.log(`Element ${selector} found after ${attempts} attempts`);
|
866 |
+
callback(element);
|
867 |
+
} else if (attempts >= maxAttempts) {
|
868 |
+
clearInterval(intervalId);
|
869 |
+
console.error(`Element ${selector} not found after ${maxAttempts} attempts`);
|
870 |
+
}
|
871 |
+
attempts++;
|
872 |
+
}, interval);
|
873 |
+
}
|
874 |
+
|
875 |
+
// Main initialization function
|
876 |
+
function initializeSlides() {
|
877 |
+
console.log("Initializing slides...");
|
878 |
+
|
879 |
+
// Wait for lecture-data to load the JSON data
|
880 |
+
waitForElement('#lecture-data', (dataElement) => {
|
881 |
+
if (!dataElement.textContent) {
|
882 |
+
console.error("Lecture data element is empty");
|
883 |
+
return;
|
884 |
+
}
|
885 |
+
let lectureData;
|
886 |
+
try {
|
887 |
+
lectureData = JSON.parse(dataElement.textContent);
|
888 |
+
console.log("Lecture data parsed successfully:", lectureData);
|
889 |
+
} catch (e) {
|
890 |
+
console.error("Failed to parse lecture data:", e);
|
891 |
+
return;
|
892 |
+
}
|
893 |
+
|
894 |
+
if (!lectureData.htmlFiles || lectureData.htmlFiles.length === 0) {
|
895 |
+
console.error("No HTML files found in lecture data");
|
896 |
+
return;
|
897 |
+
}
|
898 |
+
|
899 |
+
let currentSlide = 0;
|
900 |
+
const totalSlides = lectureData.htmlFiles.length;
|
901 |
+
let audioElements = [];
|
902 |
+
let isPlaying = false;
|
903 |
+
let hasNavigated = false; // Track if user has used prev/next buttons
|
904 |
+
|
905 |
+
// Wait for slide-content element
|
906 |
+
waitForElement('#slide-content', (slideContent) => {
|
907 |
+
console.log("Slide content element found");
|
908 |
+
|
909 |
+
// Initialize audio elements
|
910 |
+
for (let i = 0; i < totalSlides; i++) {
|
911 |
+
const audio = document.getElementById(`audio-${i+1}`);
|
912 |
+
if (audio) {
|
913 |
+
audioElements.push(audio);
|
914 |
+
console.log(`Found audio element audio-${i+1}:`, audio);
|
915 |
+
} else {
|
916 |
+
console.error(`Audio element audio-${i+1} not found`);
|
917 |
+
}
|
918 |
+
}
|
919 |
+
|
920 |
+
function renderSlide() {
|
921 |
+
console.log("Rendering slide:", currentSlide + 1);
|
922 |
+
if (currentSlide >= 0 && currentSlide < totalSlides && lectureData.htmlFiles[currentSlide]) {
|
923 |
+
const iframe = document.getElementById('slide-iframe');
|
924 |
+
if (iframe) {
|
925 |
+
iframe.src = lectureData.htmlFiles[currentSlide];
|
926 |
+
console.log("Set iframe src to:", lectureData.htmlFiles[currentSlide]);
|
927 |
+
// Adjust font size based on content length and screen size
|
928 |
+
waitForElement('iframe', (iframe) => {
|
929 |
+
iframe.onload = () => {
|
930 |
+
const doc = iframe.contentDocument || iframe.contentWindow.document;
|
931 |
+
const body = doc.body;
|
932 |
+
if (body) {
|
933 |
+
const textLength = body.textContent.length;
|
934 |
+
const screenWidth = window.innerWidth;
|
935 |
+
// Base font size: 12px max on large screens, scale down to 8px on small screens
|
936 |
+
let baseFontSize = Math.min(12, Math.max(8, 12 * (screenWidth / 1920))); // Scale with screen width (1920px as reference)
|
937 |
+
// Adjust inversely with content length
|
938 |
+
const adjustedFontSize = Math.max(8, baseFontSize * (1000 / (textLength + 100))); // Minimum 8px, scale down with length
|
939 |
+
const elements = body.getElementsByTagName('*');
|
940 |
+
for (let elem of elements) {
|
941 |
+
elem.style.fontSize = `${adjustedFontSize}px`;
|
942 |
+
}
|
943 |
+
console.log(`Adjusted font size to ${adjustedFontSize}px for ${textLength} characters on ${screenWidth}px width`);
|
944 |
+
}
|
945 |
+
};
|
946 |
+
});
|
947 |
+
} else {
|
948 |
+
console.error("Iframe not found");
|
949 |
+
}
|
950 |
+
} else {
|
951 |
+
const iframe = document.getElementById('slide-iframe');
|
952 |
+
if (iframe) {
|
953 |
+
iframe.src = "about:blank";
|
954 |
+
console.log("No valid slide content for index:", currentSlide);
|
955 |
+
}
|
956 |
+
}
|
957 |
+
}
|
958 |
+
|
959 |
+
function updateSlide(callback) {
|
960 |
+
console.log("Updating slide to index:", currentSlide);
|
961 |
+
renderSlide();
|
962 |
+
// Pause and reset all audio elements
|
963 |
+
audioElements.forEach(audio => {
|
964 |
+
if (audio && audio.pause) {
|
965 |
+
audio.pause();
|
966 |
+
audio.currentTime = 0;
|
967 |
+
audio.style.border = 'none'; // Reset border
|
968 |
+
console.log("Paused and reset audio:", audio.id);
|
969 |
+
}
|
970 |
+
});
|
971 |
+
// Wait briefly to ensure pause completes before proceeding
|
972 |
+
setTimeout(() => {
|
973 |
+
if (callback) callback();
|
974 |
+
}, 100);
|
975 |
+
}
|
976 |
+
|
977 |
+
function updateAudioSources(audioUrls) {
|
978 |
+
console.log("Updating audio sources:", audioUrls);
|
979 |
+
audioUrls.forEach((url, index) => {
|
980 |
+
const audio = audioElements[index];
|
981 |
+
if (audio && url && audio.src !== url) {
|
982 |
+
audio.src = url;
|
983 |
+
audio.load();
|
984 |
+
console.log(`Updated audio-${index+1} src to:`, url);
|
985 |
+
} else if (!audio) {
|
986 |
+
console.error(`Audio element at index ${index} not found`);
|
987 |
+
}
|
988 |
+
});
|
989 |
+
}
|
990 |
+
|
991 |
+
function prevSlide() {
|
992 |
+
console.log("Previous button clicked, current slide:", currentSlide);
|
993 |
+
hasNavigated = true; // User has navigated
|
994 |
+
if (currentSlide > 0) {
|
995 |
+
currentSlide--;
|
996 |
+
updateSlide(() => {
|
997 |
+
const audio = audioElements[currentSlide];
|
998 |
+
if (audio && audio.play && isPlaying) {
|
999 |
+
audio.style.border = '50px solid #50f150';
|
1000 |
+
audio.style.borderRadius = '50px';
|
1001 |
+
audio.play().catch(e => console.error('Audio play failed:', e));
|
1002 |
+
}
|
1003 |
+
});
|
1004 |
+
} else {
|
1005 |
+
console.log("Already at first slide");
|
1006 |
+
}
|
1007 |
+
}
|
1008 |
+
|
1009 |
+
function nextSlide() {
|
1010 |
+
console.log("Next button clicked, current slide:", currentSlide);
|
1011 |
+
hasNavigated = true; // User has navigated
|
1012 |
+
if (currentSlide < totalSlides - 1) {
|
1013 |
+
currentSlide++;
|
1014 |
+
updateSlide(() => {
|
1015 |
+
const audio = audioElements[currentSlide];
|
1016 |
+
if (audio && audio.play && isPlaying) {
|
1017 |
+
audio.style.border = '2px solid lightgreen';
|
1018 |
+
audio.play().catch(e => console.error('Audio play failed:', e));
|
1019 |
+
}
|
1020 |
+
});
|
1021 |
+
} else {
|
1022 |
+
console.log("Already at last slide");
|
1023 |
+
}
|
1024 |
+
}
|
1025 |
+
|
1026 |
+
function playAll() {
|
1027 |
+
console.log("Play button clicked, isPlaying:", isPlaying);
|
1028 |
+
const playBtn = document.getElementById('play-btn');
|
1029 |
+
if (!playBtn) {
|
1030 |
+
console.error("Play button not found");
|
1031 |
+
return;
|
1032 |
+
}
|
1033 |
+
const playIcon = playBtn.querySelector('i');
|
1034 |
+
if (playIcon.className.includes('fa-pause')) {
|
1035 |
+
// Pause playback
|
1036 |
+
isPlaying = false;
|
1037 |
+
audioElements.forEach(audio => {
|
1038 |
+
if (audio && audio.pause) {
|
1039 |
+
audio.pause();
|
1040 |
+
audio.currentTime = 0;
|
1041 |
+
audio.style.border = 'none';
|
1042 |
+
console.log("Paused audio:", audio.id);
|
1043 |
+
}
|
1044 |
+
});
|
1045 |
+
playIcon.className = 'fas fa-play';
|
1046 |
+
return;
|
1047 |
+
}
|
1048 |
+
// Start playback
|
1049 |
+
currentSlide = 0;
|
1050 |
+
let index = 0;
|
1051 |
+
isPlaying = true;
|
1052 |
+
playIcon.className = 'fas fa-pause';
|
1053 |
+
updateSlide(() => {
|
1054 |
+
function playNext() {
|
1055 |
+
if (index >= totalSlides || !isPlaying) {
|
1056 |
+
isPlaying = false;
|
1057 |
+
playIcon.className = 'fas fa-play';
|
1058 |
+
audioElements.forEach(audio => {
|
1059 |
+
if (audio) audio.style.border = 'none';
|
1060 |
+
});
|
1061 |
+
console.log("Finished playing all slides or paused");
|
1062 |
+
return;
|
1063 |
+
}
|
1064 |
+
currentSlide = index;
|
1065 |
+
updateSlide(() => {
|
1066 |
+
const audio = audioElements[index];
|
1067 |
+
if (audio && audio.play) {
|
1068 |
+
// Highlight the current audio element
|
1069 |
+
audioElements.forEach(a => a.style.border = 'none');
|
1070 |
+
audio.style.border = '2px solid lightgreen';
|
1071 |
+
console.log(`Attempting to play audio for slide ${index + 1}`);
|
1072 |
+
audio.play().then(() => {
|
1073 |
+
console.log(`Playing audio for slide ${index + 1}`);
|
1074 |
+
// Remove any existing ended listeners to prevent duplicates
|
1075 |
+
audio.onended = null;
|
1076 |
+
audio.addEventListener('ended', () => {
|
1077 |
+
console.log(`Audio ended for slide ${index + 1}`);
|
1078 |
+
index++;
|
1079 |
+
playNext();
|
1080 |
+
}, { once: true });
|
1081 |
+
// Fallback: Check if audio is stuck (e.g., duration not advancing)
|
1082 |
+
const checkDuration = setInterval(() => {
|
1083 |
+
if (!isPlaying) {
|
1084 |
+
clearInterval(checkDuration);
|
1085 |
+
return;
|
1086 |
+
}
|
1087 |
+
if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
|
1088 |
+
console.log(`Fallback: Audio for slide ${index + 1} considered ended`);
|
1089 |
+
clearInterval(checkDuration);
|
1090 |
+
audio.onended = null; // Prevent duplicate triggers
|
1091 |
+
index++;
|
1092 |
+
playNext();
|
1093 |
+
}
|
1094 |
+
}, 1000);
|
1095 |
+
}).catch(e => {
|
1096 |
+
console.error(`Audio play failed for slide ${index + 1}:`, e);
|
1097 |
+
// Retry playing the same slide after a short delay
|
1098 |
+
setTimeout(() => {
|
1099 |
+
audio.play().then(() => {
|
1100 |
+
console.log(`Retry succeeded for slide ${index + 1}`);
|
1101 |
+
audio.onended = null;
|
1102 |
+
audio.addEventListener('ended', () => {
|
1103 |
+
console.log(`Audio ended for slide ${index + 1}`);
|
1104 |
+
index++;
|
1105 |
+
playNext();
|
1106 |
+
}, { once: true });
|
1107 |
+
const checkDuration = setInterval(() => {
|
1108 |
+
if (!isPlaying) {
|
1109 |
+
clearInterval(checkDuration);
|
1110 |
+
return;
|
1111 |
+
}
|
1112 |
+
if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
|
1113 |
+
console.log(`Fallback: Audio for slide ${index + 1} considered ended`);
|
1114 |
+
clearInterval(checkDuration);
|
1115 |
+
audio.onended = null;
|
1116 |
+
index++;
|
1117 |
+
playNext();
|
1118 |
+
}
|
1119 |
+
}, 1000);
|
1120 |
+
}).catch(e => {
|
1121 |
+
console.error(`Retry failed for slide ${index + 1}:`, e);
|
1122 |
+
index++; // Move to next slide if retry fails
|
1123 |
+
playNext();
|
1124 |
+
});
|
1125 |
+
}, 500);
|
1126 |
+
});
|
1127 |
+
} else {
|
1128 |
+
index++;
|
1129 |
+
playNext();
|
1130 |
+
}
|
1131 |
+
});
|
1132 |
+
}
|
1133 |
+
playNext();
|
1134 |
+
});
|
1135 |
+
}
|
1136 |
+
|
1137 |
+
function toggleFullScreen() {
|
1138 |
+
console.log("Fullscreen button clicked");
|
1139 |
+
const container = document.getElementById('lecture-container');
|
1140 |
+
if (!container) {
|
1141 |
+
console.error("Lecture container not found");
|
1142 |
+
return;
|
1143 |
+
}
|
1144 |
+
if (!document.fullscreenElement) {
|
1145 |
+
container.requestFullscreen().catch(err => {
|
1146 |
+
console.error('Error enabling full-screen:', err);
|
1147 |
+
});
|
1148 |
+
} else {
|
1149 |
+
document.exitFullscreen();
|
1150 |
+
console.log("Exited fullscreen");
|
1151 |
+
}
|
1152 |
+
}
|
1153 |
+
|
1154 |
+
// Attach event listeners
|
1155 |
+
waitForElement('#prev-btn', (prevBtn) => {
|
1156 |
+
prevBtn.addEventListener('click', prevSlide);
|
1157 |
+
console.log("Attached event listener to prev-btn");
|
1158 |
+
});
|
1159 |
+
|
1160 |
+
waitForElement('#play-btn', (playBtn) => {
|
1161 |
+
playBtn.addEventListener('click', playAll);
|
1162 |
+
console.log("Attached event listener to play-btn");
|
1163 |
+
});
|
1164 |
+
|
1165 |
+
waitForElement('#next-btn', (nextBtn) => {
|
1166 |
+
nextBtn.addEventListener('click', nextSlide);
|
1167 |
+
console.log("Attached event listener to next-btn");
|
1168 |
+
});
|
1169 |
+
|
1170 |
+
waitForElement('#fullscreen-btn', (fullscreenBtn) => {
|
1171 |
+
fullscreenBtn.addEventListener('click', toggleFullScreen);
|
1172 |
+
console.log("Attached event listener to fullscreen-btn");
|
1173 |
+
});
|
1174 |
+
|
1175 |
+
// Initialize audio sources and render first slide
|
1176 |
+
updateAudioSources(lectureData.audioFiles);
|
1177 |
+
renderSlide();
|
1178 |
+
console.log("Initial slide rendered, starting at slide:", currentSlide + 1);
|
1179 |
+
});
|
1180 |
+
});
|
1181 |
+
}
|
1182 |
+
|
1183 |
+
// Observe DOM changes to detect when lecture container is added
|
1184 |
+
const observer = new MutationObserver((mutations) => {
|
1185 |
+
mutations.forEach((mutation) => {
|
1186 |
+
if (mutation.addedNodes.length) {
|
1187 |
+
const lectureContainer = document.getElementById('lecture-container');
|
1188 |
+
if (lectureContainer) {
|
1189 |
+
console.log("Lecture container detected in DOM");
|
1190 |
+
observer.disconnect(); // Stop observing once found
|
1191 |
+
initializeSlides();
|
1192 |
+
}
|
1193 |
+
}
|
1194 |
+
});
|
1195 |
+
});
|
1196 |
+
|
1197 |
+
// Start observing the document body for changes
|
1198 |
+
observer.observe(document.body, { childList: true, subtree: true });
|
1199 |
+
console.log("Started observing DOM for lecture container");
|
1200 |
+
}
|
1201 |
+
"""
|
1202 |
+
|
1203 |
# Gradio interface
|
1204 |
+
with gr.Blocks(
|
1205 |
+
title="Agent Feynman",
|
1206 |
+
css="""
|
1207 |
+
#lecture-container {font-family: 'Times New Roman', Times, serif;}
|
1208 |
+
#slide-content {font-size: 48px; line-height: 1.2;}
|
1209 |
+
#form-group {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; font-weight: 900; color: #000; background-color: white;}
|
1210 |
+
#download {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px;}
|
1211 |
+
#slide-display {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white;}
|
1212 |
+
button {transition: background-color 0.3s;}
|
1213 |
+
button:hover {background-color: #e0e0e0;}
|
1214 |
+
""",
|
1215 |
+
js=js_code,
|
1216 |
+
head='<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">'
|
1217 |
+
) as demo:
|
1218 |
+
gr.Markdown("""
|
1219 |
+
# <center>Professor AI Feynman: A Multi-Agent Tool for Learning Anything the Feynman way.</center>
|
1220 |
+
## <center>(Jaward Sesay - Microsoft AI Agent Hackathon Submission)</center>""")
|
1221 |
with gr.Row():
|
1222 |
with gr.Column(scale=1):
|
1223 |
+
with gr.Group(elem_id="form-group"):
|
1224 |
title = gr.Textbox(label="Lecture Title", placeholder="e.g. Introduction to AI")
|
1225 |
lecture_content_description = gr.Textbox(label="Lecture Content Description", placeholder="e.g. Focus on recent advancements")
|
1226 |
lecture_type = gr.Dropdown(["Conference", "University", "High school"], label="Audience", value="University")
|
|
|
1229 |
"OpenAI-gpt-4o-2024-08-06",
|
1230 |
"Anthropic-claude-3-sonnet-20240229",
|
1231 |
"Google-gemini-1.5-flash",
|
1232 |
+
"Ollama-llama3.2",
|
1233 |
+
"Azure AI Foundry"
|
1234 |
],
|
1235 |
label="Model",
|
1236 |
value="Google-gemini-1.5-flash"
|
1237 |
)
|
1238 |
+
api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama or Azure AI Foundry (use GITHUB_TOKEN env var)")
|
1239 |
+
serpapi_key = gr.Textbox(label="SerpApi Key (For Research Agent)", type="password", placeholder="Enter your SerpApi key (optional)")
|
1240 |
+
num_slides = gr.Slider(1, 20, step=1, label="Number of Content Slides", value=3)
|
1241 |
+
speaker_audio = gr.Audio(label="Speaker sample speech (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
|
1242 |
generate_btn = gr.Button("Generate Lecture")
|
1243 |
with gr.Column(scale=2):
|
1244 |
default_slide_html = """
|
1245 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
|
1246 |
+
<h2 style="font-style: italic; color: #000;">Waiting for lecture content...</h2>
|
1247 |
<p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
|
1248 |
</div>
|
1249 |
"""
|
1250 |
+
slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html, elem_id="slide-display")
|
1251 |
+
file_output = gr.File(label="Download Lecture Materials", elem_id="download")
|
|
|
1252 |
|
1253 |
speaker_audio.change(
|
1254 |
fn=update_audio_preview,
|
|
|
1259 |
generate_btn.click(
|
1260 |
fn=on_generate,
|
1261 |
inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
|
1262 |
+
outputs=[slide_display, file_output]
|
1263 |
)
|
1264 |
|
1265 |
if __name__ == "__main__":
|