Jaward commited on
Commit
1ff7e02
·
verified ·
1 Parent(s): a646d0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +573 -452
app.py CHANGED
@@ -16,11 +16,14 @@ from autogen_agentchat.messages import TextMessage, HandoffMessage, StructuredMe
16
  from autogen_ext.models.anthropic import AnthropicChatCompletionClient
17
  from autogen_ext.models.openai import OpenAIChatCompletionClient
18
  from autogen_ext.models.ollama import OllamaChatCompletionClient
 
 
19
  import traceback
20
  import soundfile as sf
21
  import tempfile
22
  from pydub import AudioSegment
23
  from TTS.api import TTS
 
24
 
25
  # Set up logging
26
  logging.basicConfig(
@@ -34,18 +37,17 @@ logging.basicConfig(
34
  logger = logging.getLogger(__name__)
35
 
36
  # Set up environment
37
- OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") # Fallback for local dev
38
  os.makedirs(OUTPUT_DIR, exist_ok=True)
39
  logger.info(f"Using output directory: {OUTPUT_DIR}")
40
  os.environ["COQUI_TOS_AGREED"] = "1"
41
 
42
- # Initialize TTS model at the top
43
-
44
  device = "cuda" if torch.cuda.is_available() else "cpu"
45
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
46
  logger.info("TTS model initialized on %s", device)
47
 
48
- # Define Pydantic model for slide data
49
  class Slide(BaseModel):
50
  title: str
51
  content: str
@@ -53,7 +55,7 @@ class Slide(BaseModel):
53
  class SlidesOutput(BaseModel):
54
  slides: list[Slide]
55
 
56
- # Define search_web tool using SerpApi
57
  def search_web(query: str, serpapi_key: str) -> str:
58
  try:
59
  params = {
@@ -88,18 +90,68 @@ def search_web(query: str, serpapi_key: str) -> str:
88
  logger.error("Unexpected error during search: %s", str(e))
89
  return None
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # Define helper function for progress HTML
92
  def html_with_progress(label, progress):
93
  return f"""
94
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
95
- <div style="width: 70%; background-color: #FFFFFF; border-radius: 80px; overflow: hidden; margin-bottom: 20px;">
96
  <div style="width: {progress}%; height: 15px; background-color: #4CAF50; border-radius: 80px;"></div>
97
  </div>
98
  <h2 style="font-style: italic; color: #555;">{label}</h2>
99
  </div>
100
  """
101
 
102
- # Function to get model client based on selected service
103
  def get_model_client(service, api_key):
104
  if service == "OpenAI-gpt-4o-2024-08-06":
105
  return OpenAIChatCompletionClient(model="gpt-4o-2024-08-06", api_key=api_key)
@@ -109,6 +161,19 @@ def get_model_client(service, api_key):
109
  return OpenAIChatCompletionClient(model="gemini-1.5-flash", api_key=api_key)
110
  elif service == "Ollama-llama3.2":
111
  return OllamaChatCompletionClient(model="llama3.2")
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  else:
113
  raise ValueError("Invalid service")
114
 
@@ -292,61 +357,38 @@ def extract_json_from_message(message):
292
  logger.warning("Unsupported message type for JSON extraction: %s", type(message))
293
  return None
294
 
295
- # Function to generate Markdown slides
296
- def generate_markdown_slides(slides, title, speaker="Prof. AI Feynman", date="April 26th, 2025"):
297
- try:
298
- markdown_slides = []
299
- for i, slide in enumerate(slides):
300
- slide_number = i + 1
301
- content = slide['content']
302
-
303
- # First slide has no header/footer, others have header and footer
304
- if i == 0:
305
- slide_md = f"""
306
- # {slide['title']}
307
- {content}
308
-
309
- **{speaker}**
310
- *{date}*
311
- """
312
- else:
313
- slide_md = f"""
314
- ##### Slide {slide_number}, {slide['title']}
315
- {content}
316
-
317
- , {title} {speaker}, {date}
318
- """
319
- markdown_slides.append(slide_md.strip())
320
-
321
- logger.info(f"Generated Markdown slides for: {title}: {markdown_slides}")
322
- return markdown_slides
323
- except Exception as e:
324
- logger.error(f"Failed to generate Markdown slides: {str(e)}")
325
- logger.error(traceback.format_exc())
326
- return None
327
-
328
- # Async function to update audio preview
329
  async def update_audio_preview(audio_file):
330
  if audio_file:
331
  logger.info("Updating audio preview for file: %s", audio_file)
332
  return audio_file
333
  return None
334
 
335
- # Function to create a zip file of all .txt files
336
- def create_zip_of_txt_files(txt_file_paths):
337
  zip_path = os.path.join(OUTPUT_DIR, "lecture_files.zip")
338
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
339
- for file_path in txt_file_paths:
340
  if os.path.exists(file_path):
341
- zipf.write(file_path, os.path.basename(file_path))
 
 
 
342
  logger.info("Created zip file: %s", zip_path)
343
  return zip_path
344
 
345
- # Async function to generate lecture materials and audio
 
 
 
 
 
346
  async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides):
347
  model_client = get_model_client(api_service, api_key)
348
 
349
- total_slides = num_slides # Use exactly the number of slides from input
 
 
350
 
351
  research_agent = AssistantAgent(
352
  name="research_agent",
@@ -360,14 +402,23 @@ async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_
360
  model_client=model_client,
361
  handoffs=["script_agent"],
362
  system_message=f"""
363
- You are a Slide Agent. Using the research from the conversation history and the specified number of slides ({total_slides}), generate exactly {total_slides} content slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {total_slides} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
364
- Example output for 2 slides:
 
 
 
 
 
 
 
365
  ```json
366
  [
367
- {{"title": "Slide 1", "content": "Content for slide 1"}},
368
- {{"title": "Slide 2", "content": "Content for slide 2"}}
 
369
  ]
370
  ```""",
 
371
  output_content_type=None,
372
  reflect_on_tool_use=False
373
  )
@@ -376,13 +427,18 @@ Example output for 2 slides:
376
  model_client=model_client,
377
  handoffs=["feynman_agent"],
378
  system_message=f"""
379
- You are a Script Agent. Access the JSON array of {total_slides} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone as a professor would deliver it. Avoid using non-verbal fillers such as "um," "you know," or "like." Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
380
- Example for 3 slides:
 
 
 
 
 
381
  ```json
382
  [
383
- "Hello everyone, welcome to Agents 101. I am Jaward, your primary instructor for this course.",
384
- "Today, we will cover the syllabus for this semester, providing a gentle introduction to AI agents.",
385
- "Let us define what an AI agent is: it refers to a system or program capable of autonomously performing tasks on behalf of a user or another system."
386
  ]
387
  ```""",
388
  output_content_type=None,
@@ -393,8 +449,8 @@ Example for 3 slides:
393
  model_client=model_client,
394
  handoffs=[],
395
  system_message=f"""
396
- You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {total_slides} slides and {total_slides} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({total_slides}), report the issue clearly. Use 'TERMINATE' to signal completion.
397
- Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is coherent. TERMINATE'
398
  """)
399
 
400
  swarm = Swarm(
@@ -406,7 +462,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
406
  label = "Research: in progress..."
407
  yield (
408
  html_with_progress(label, progress),
409
- [], None
410
  )
411
  await asyncio.sleep(0.1)
412
 
@@ -414,13 +470,14 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
414
  Lecture Title: {title}
415
  Lecture Content Description: {lecture_content_description}
416
  Audience: {lecture_type}
417
- Number of Slides: {total_slides}
418
  Please start by researching the topic, or proceed without research if search is unavailable.
419
  """
420
- logger.info("Starting lecture generation for title: %s with %d slides", title, total_slides)
421
 
422
  slides = None
423
  scripts = None
 
424
  error_html = """
425
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
426
  <h2 style="color: #d9534f;">Failed to generate lecture materials</h2>
@@ -452,7 +509,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
452
  label = "Slides: generating..."
453
  yield (
454
  html_with_progress(label, progress),
455
- [], None
456
  )
457
  await asyncio.sleep(0.1)
458
  elif source == "slide_agent" and message.target == "script_agent":
@@ -467,7 +524,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
467
  slide_retry_count += 1
468
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
469
  retry_message = TextMessage(
470
- content=f"Please generate exactly {total_slides} slides as per your instructions.",
471
  source="user",
472
  recipient="slide_agent"
473
  )
@@ -477,7 +534,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
477
  label = "Scripts: generating..."
478
  yield (
479
  html_with_progress(label, progress),
480
- [], None
481
  )
482
  await asyncio.sleep(0.1)
483
  elif source == "script_agent" and message.target == "feynman_agent":
@@ -491,7 +548,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
491
  label = "Review: in progress..."
492
  yield (
493
  html_with_progress(label, progress),
494
- [], None
495
  )
496
  await asyncio.sleep(0.1)
497
 
@@ -501,7 +558,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
501
  label = "Slides: generating..."
502
  yield (
503
  html_with_progress(label, progress),
504
- [], None
505
  )
506
  await asyncio.sleep(0.1)
507
 
@@ -516,25 +573,21 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
516
  slide_retry_count += 1
517
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
518
  retry_message = TextMessage(
519
- content=f"Please generate exactly {total_slides} slides as per your instructions.",
520
  source="user",
521
  recipient="slide_agent"
522
  )
523
  task_result.messages.append(retry_message)
524
  continue
525
- for i, slide in enumerate(slides):
526
- content_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_content.txt")
527
- try:
528
- with open(content_file, "w", encoding="utf-8") as f:
529
- f.write(slide["content"])
530
- logger.info("Saved slide content to %s", content_file)
531
- except Exception as e:
532
- logger.error("Error saving slide content to %s: %s", content_file, str(e))
533
  progress = 50
534
  label = "Scripts: generating..."
535
  yield (
536
  html_with_progress(label, progress),
537
- [], None
538
  )
539
  await asyncio.sleep(0.1)
540
  else:
@@ -543,7 +596,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
543
  slide_retry_count += 1
544
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
545
  retry_message = TextMessage(
546
- content=f"Please generate exactly {total_slides} slides as per your instructions.",
547
  source="user",
548
  recipient="slide_agent"
549
  )
@@ -568,7 +621,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
568
  label = "Scripts generated and saved. Reviewing..."
569
  yield (
570
  html_with_progress(label, progress),
571
- [], None
572
  )
573
  await asyncio.sleep(0.1)
574
  else:
@@ -588,20 +641,18 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
588
  logger.info("Feynman Agent completed lecture review: %s", message.content)
589
  progress = 90
590
  label = "Lecture materials ready. Generating audio..."
591
- # Collect .txt files for download
592
- txt_files = [f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')]
593
- txt_files.sort() # Sort for consistent display
594
- txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
595
- zip_file = create_zip_of_txt_files(txt_file_paths)
596
  yield (
597
  html_with_progress(label, progress),
598
- txt_file_paths,
599
- zip_file
600
  )
601
  await asyncio.sleep(0.1)
602
 
603
  logger.info("Slides state: %s", "Generated" if slides else "None")
604
  logger.info("Scripts state: %s", "Generated" if scripts else "None")
 
605
  if not slides or not scripts:
606
  error_message = f"Failed to generate {'slides and scripts' if not slides and not scripts else 'slides' if not slides else 'scripts'}"
607
  error_message += f". Received {len(slides) if slides else 0} slides and {len(scripts) if scripts else 0} scripts."
@@ -612,7 +663,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
612
  logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
613
  yield (
614
  error_html,
615
- [], None
616
  )
617
  return
618
 
@@ -625,7 +676,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
625
  <p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
626
  </div>
627
  """,
628
- [], None
629
  )
630
  return
631
 
@@ -638,7 +689,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
638
  <p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
639
  </div>
640
  """,
641
- [], None
642
  )
643
  return
644
 
@@ -651,192 +702,21 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
651
  <p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
652
  </div>
653
  """,
654
- [], None
655
  )
656
  return
657
 
658
- markdown_slides = generate_markdown_slides(slides, title)
659
- if not markdown_slides:
660
- logger.error("Failed to generate Markdown slides")
661
- yield (
662
- f"""
663
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
664
- <h2 style="color: #d9534f;">Failed to generate slides</h2>
665
- <p style="margin-top: 20px;">Please try again.</p>
666
- </div>
667
- """,
668
- [], None
669
- )
670
- return
671
-
672
- # Generate initial audio timeline with placeholders
673
  audio_urls = [None] * len(scripts)
674
  audio_timeline = ""
675
  for i in range(len(scripts)):
676
  audio_timeline += f'<audio id="audio-{i+1}" controls src="" style="display: inline-block; margin: 0 10px; width: 200px;"><span>Loading...</span></audio>'
677
 
678
- # Collect .txt files for download (already done above, but ensure it's available)
679
- txt_files = [f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')]
680
- txt_files.sort() # Sort for consistent display
681
- txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
682
- zip_file = create_zip_of_txt_files(txt_file_paths)
683
-
684
- # Yield the lecture materials immediately after slides and scripts are ready
685
- slides_info = json.dumps({"slides": markdown_slides, "audioFiles": audio_urls})
686
- html_output = f"""
687
- <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
688
- <div id="slide-content" style="flex: 1; overflow: auto; padding: 20px; text-align: center; background-color: #fff; color: #333;"></div>
689
- <div style="padding: 20px; text-align: center;">
690
- <div style="display: flex; justify-content: center; margin-bottom: 10px;">
691
- {audio_timeline}
692
- </div>
693
- <div style="display: flex; justify-content: center; margin-bottom: 10px;">
694
- <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
695
- <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
696
- <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
697
- <button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">☐</button>
698
- </div>
699
- </div>
700
- </div>
701
- <script>
702
- const lectureData = {slides_info};
703
- let currentSlide = 0;
704
- const totalSlides = lectureData.slides.length;
705
- let audioElements = [];
706
- let isPlaying = false;
707
-
708
- // Populate audio elements
709
- for (let i = 0; i < totalSlides; i++) {{
710
- const audio = document.getElementById(`audio-${{i+1}}`);
711
- audioElements.push(audio);
712
- }}
713
-
714
- function renderSlide() {{
715
- const slideContent = document.getElementById('slide-content');
716
- if (lectureData.slides[currentSlide]) {{
717
- // Since the content is already Markdown-rendered by Gradio, we can set it directly
718
- slideContent.innerHTML = lectureData.slides[currentSlide].replace(/\\n/g, '<br>');
719
- console.log("Rendering slide:", lectureData.slides[currentSlide]);
720
- }} else {{
721
- slideContent.innerHTML = '<h2>No slide content available</h2>';
722
- console.log("No slide content for index:", currentSlide);
723
- }}
724
- }}
725
-
726
- function updateSlide() {{
727
- renderSlide();
728
- audioElements.forEach(audio => {{
729
- if (audio && audio.pause) {{
730
- audio.pause();
731
- audio.currentTime = 0;
732
- }}
733
- }});
734
- }}
735
-
736
- function updateAudioSources(audioUrls) {{
737
- audioUrls.forEach((url, index) => {{
738
- const audio = audioElements[index];
739
- if (audio && url && audio.src !== url) {{
740
- audio.src = url;
741
- audio.load(); // Force reload the audio element
742
- console.log(`Updated audio-${{index+1}} src to:`, url);
743
- }}
744
- }});
745
- }}
746
-
747
- function prevSlide() {{
748
- if (currentSlide > 0) {{
749
- currentSlide--;
750
- updateSlide();
751
- const audio = audioElements[currentSlide];
752
- if (audio && audio.play && isPlaying) {{
753
- audio.play().catch(e => console.error('Audio play failed:', e));
754
- }}
755
- }}
756
- }}
757
-
758
- function nextSlide() {{
759
- if (currentSlide < totalSlides - 1) {{
760
- currentSlide++;
761
- updateSlide();
762
- const audio = audioElements[currentSlide];
763
- if (audio && audio.play && isPlaying) {{
764
- audio.play().catch(e => console.error('Audio play failed:', e));
765
- }}
766
- }}
767
- }}
768
-
769
- function playAll() {{
770
- isPlaying = !isPlaying;
771
- const playBtn = document.getElementById('play-btn');
772
- playBtn.textContent = isPlaying ? '⏸' : '⏯';
773
- if (!isPlaying) {{
774
- audioElements.forEach(audio => {{
775
- if (audio && audio.pause) {{
776
- audio.pause();
777
- audio.currentTime = 0;
778
- }}
779
- }});
780
- return;
781
- }}
782
- let index = currentSlide;
783
- function playNext() {{
784
- if (index >= totalSlides || !isPlaying) {{
785
- isPlaying = false;
786
- playBtn.textContent = '⏯';
787
- return;
788
- }}
789
- currentSlide = index;
790
- updateSlide();
791
- const audio = audioElements[index];
792
- if (audio && audio.play) {{
793
- audio.play().then(() => {{
794
- audio.addEventListener('ended', () => {{
795
- index++;
796
- playNext();
797
- }}, {{ once: true }});
798
- }}).catch(e => {{
799
- console.error('Audio play failed:', e);
800
- index++;
801
- playNext();
802
- }});
803
- }} else {{
804
- index++;
805
- playNext();
806
- }}
807
- }}
808
- playNext();
809
- }}
810
-
811
- function toggleFullScreen() {{
812
- const container = document.getElementById('lecture-container');
813
- if (!document.fullscreenElement) {{
814
- container.requestFullscreen().catch(err => {{
815
- console.error('Error attempting to enable full-screen mode:', err);
816
- }});
817
- }} else {{
818
- document.exitFullscreen();
819
- }}
820
- }}
821
-
822
- // Attach event listeners
823
- document.getElementById('prev-btn').addEventListener('click', prevSlide);
824
- document.getElementById('play-btn').addEventListener('click', playAll);
825
- document.getElementById('next-btn').addEventListener('click', nextSlide);
826
- document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
827
-
828
- // Initialize first slide
829
- renderSlide();
830
- </script>
831
- """
832
- logger.info("Yielding lecture materials before audio generation")
833
- yield (
834
- html_output,
835
- txt_file_paths,
836
- zip_file
837
- )
838
 
839
- # Now generate audio files progressively
840
  audio_files = []
841
  validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
842
  if not validated_speaker_wav:
@@ -848,7 +728,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
848
  <p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
849
  </div>
850
  """,
851
- [], None
852
  )
853
  return
854
 
@@ -869,11 +749,10 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
869
  audio_files.append(None)
870
  audio_urls[i] = None
871
  progress = 90 + ((i + 1) / len(scripts)) * 10
872
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
873
  yield (
874
- html_output,
875
- txt_file_paths,
876
- zip_file
877
  )
878
  await asyncio.sleep(0.1)
879
  continue
@@ -894,171 +773,13 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
894
 
895
  logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
896
  audio_files.append(audio_file)
897
- audio_urls[i] = f"/gradio_api/file={audio_file}"
898
  progress = 90 + ((i + 1) / len(scripts)) * 10
899
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
900
-
901
- # Update audio timeline with the new audio URL
902
- audio_timeline = ""
903
- for j, url in enumerate(audio_urls):
904
- if url:
905
- audio_timeline += f'<audio id="audio-{j+1}" controls src="{url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
906
- else:
907
- audio_timeline += f'<audio id="audio-{j+1}" controls src="" style="display: inline-block; margin: 0 10px; width: 200px;"><span>Loading...</span></audio>'
908
-
909
- html_output = f"""
910
- <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
911
- <div id="slide-content" style="flex: 1; overflow: auto; padding: 20px; text-align: center; background-color: #fff; color: #333;"></div>
912
- <div style="padding: 20px; text-align: center;">
913
- <div style="display: flex; justify-content: center; margin-bottom: 10px;">
914
- {audio_timeline}
915
- </div>
916
- <div style="display: flex; justify-content: center; margin-bottom: 10px;">
917
- <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
918
- <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
919
- <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
920
- <button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">☐</button>
921
- </div>
922
- </div>
923
- </div>
924
- <script>
925
- const lectureData = {slides_info};
926
- let currentSlide = 0;
927
- const totalSlides = lectureData.slides.length;
928
- let audioElements = [];
929
- let isPlaying = false;
930
-
931
- // Populate audio elements
932
- for (let i = 0; i < totalSlides; i++) {{
933
- const audio = document.getElementById(`audio-${{i+1}}`);
934
- audioElements.push(audio);
935
- }}
936
-
937
- // Update audio sources dynamically
938
- lectureData.audioFiles = {json.dumps(audio_urls)};
939
- updateAudioSources(lectureData.audioFiles);
940
-
941
- function renderSlide() {{
942
- const slideContent = document.getElementById('slide-content');
943
- if (lectureData.slides[currentSlide]) {{
944
- slideContent.innerHTML = lectureData.slides[currentSlide].replace(/\\n/g, '<br>');
945
- console.log("Rendering slide:", lectureData.slides[currentSlide]);
946
- }} else {{
947
- slideContent.innerHTML = '<h2>No slide content available</h2>';
948
- console.log("No slide content for index:", currentSlide);
949
- }}
950
- }}
951
-
952
- function updateSlide() {{
953
- renderSlide();
954
- audioElements.forEach(audio => {{
955
- if (audio && audio.pause) {{
956
- audio.pause();
957
- audio.currentTime = 0;
958
- }}
959
- }});
960
- }}
961
-
962
- function updateAudioSources(audioUrls) {{
963
- audioUrls.forEach((url, index) => {{
964
- const audio = audioElements[index];
965
- if (audio && url && audio.src !== url) {{
966
- audio.src = url;
967
- audio.load();
968
- console.log(`Updated audio-${{index+1}} src to:`, url);
969
- }}
970
- }});
971
- }}
972
-
973
- function prevSlide() {{
974
- if (currentSlide > 0) {{
975
- currentSlide--;
976
- updateSlide();
977
- const audio = audioElements[currentSlide];
978
- if (audio && audio.play && isPlaying) {{
979
- audio.play().catch(e => console.error('Audio play failed:', e));
980
- }}
981
- }}
982
- }}
983
-
984
- function nextSlide() {{
985
- if (currentSlide < totalSlides - 1) {{
986
- currentSlide++;
987
- updateSlide();
988
- const audio = audioElements[currentSlide];
989
- if (audio && audio.play && isPlaying) {{
990
- audio.play().catch(e => console.error('Audio play failed:', e));
991
- }}
992
- }}
993
- }}
994
-
995
- function playAll() {{
996
- isPlaying = !isPlaying;
997
- const playBtn = document.getElementById('play-btn');
998
- playBtn.textContent = isPlaying ? '⏸' : '⏯';
999
- if (!isPlaying) {{
1000
- audioElements.forEach(audio => {{
1001
- if (audio && audio.pause) {{
1002
- audio.pause();
1003
- audio.currentTime = 0;
1004
- }}
1005
- }});
1006
- return;
1007
- }}
1008
- let index = currentSlide;
1009
- function playNext() {{
1010
- if (index >= totalSlides || !isPlaying) {{
1011
- isPlaying = false;
1012
- playBtn.textContent = '⏯';
1013
- return;
1014
- }}
1015
- currentSlide = index;
1016
- updateSlide();
1017
- const audio = audioElements[index];
1018
- if (audio && audio.play) {{
1019
- audio.play().then(() => {{
1020
- audio.addEventListener('ended', () => {{
1021
- index++;
1022
- playNext();
1023
- }}, {{ once: true }});
1024
- }}).catch(e => {{
1025
- console.error('Audio play failed:', e);
1026
- index++;
1027
- playNext();
1028
- }});
1029
- }} else {{
1030
- index++;
1031
- playNext();
1032
- }}
1033
- }}
1034
- playNext();
1035
- }}
1036
-
1037
- function toggleFullScreen() {{
1038
- const container = document.getElementById('lecture-container');
1039
- if (!document.fullscreenElement) {{
1040
- container.requestFullscreen().catch(err => {{
1041
- console.error('Error attempting to enable full-screen mode:', err);
1042
- }});
1043
- }} else {{
1044
- document.exitFullscreen();
1045
- }}
1046
- }}
1047
-
1048
- // Attach event listeners
1049
- document.getElementById('prev-btn').addEventListener('click', prevSlide);
1050
- document.getElementById('play-btn').addEventListener('click', playAll);
1051
- document.getElementById('next-btn').addEventListener('click', nextSlide);
1052
- document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
1053
-
1054
- // Initialize first slide
1055
- renderSlide();
1056
- </script>
1057
- """
1058
  yield (
1059
- html_output,
1060
- txt_file_paths,
1061
- zip_file
1062
  )
1063
  await asyncio.sleep(0.1)
1064
  break
@@ -1069,15 +790,52 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
1069
  audio_files.append(None)
1070
  audio_urls[i] = None
1071
  progress = 90 + ((i + 1) / len(scripts)) * 10
1072
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
1073
  yield (
1074
- html_output,
1075
- txt_file_paths,
1076
- zip_file
1077
  )
1078
  await asyncio.sleep(0.1)
1079
  break
1080
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1081
  logger.info("Lecture generation completed successfully")
1082
 
1083
  except Exception as e:
@@ -1090,16 +848,379 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
1090
  <p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
1091
  </div>
1092
  """,
1093
- [], None
1094
  )
1095
  return
1096
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1097
  # Gradio interface
1098
- with gr.Blocks(title="Agent Feynman") as demo:
1099
- gr.Markdown("# <center>Learn Anything With Professor AI Feynman</center>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1100
  with gr.Row():
1101
  with gr.Column(scale=1):
1102
- with gr.Group():
1103
  title = gr.Textbox(label="Lecture Title", placeholder="e.g. Introduction to AI")
1104
  lecture_content_description = gr.Textbox(label="Lecture Content Description", placeholder="e.g. Focus on recent advancements")
1105
  lecture_type = gr.Dropdown(["Conference", "University", "High school"], label="Audience", value="University")
@@ -1108,26 +1229,26 @@ with gr.Blocks(title="Agent Feynman") as demo:
1108
  "OpenAI-gpt-4o-2024-08-06",
1109
  "Anthropic-claude-3-sonnet-20240229",
1110
  "Google-gemini-1.5-flash",
1111
- "Ollama-llama3.2"
 
1112
  ],
1113
  label="Model",
1114
  value="Google-gemini-1.5-flash"
1115
  )
1116
- api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama")
1117
- serpapi_key = gr.Textbox(label="SerpApi Key", type="password", placeholder="Enter your SerpApi key (optional)")
1118
- num_slides = gr.Slider(1, 20, step=1, label="Number of Slides", value=3)
1119
- speaker_audio = gr.Audio(label="Speaker sample audio (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
1120
  generate_btn = gr.Button("Generate Lecture")
1121
  with gr.Column(scale=2):
1122
  default_slide_html = """
1123
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
1124
- <h2 style="font-style: italic; color: #555;">Waiting for lecture content...</h2>
1125
  <p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
1126
  </div>
1127
  """
1128
- slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html)
1129
- file_output = gr.File(label="Download Generated Files")
1130
- zip_output = gr.File(label="Download All Files as ZIP")
1131
 
1132
  speaker_audio.change(
1133
  fn=update_audio_preview,
@@ -1138,7 +1259,7 @@ with gr.Blocks(title="Agent Feynman") as demo:
1138
  generate_btn.click(
1139
  fn=on_generate,
1140
  inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
1141
- outputs=[slide_display, file_output, zip_output]
1142
  )
1143
 
1144
  if __name__ == "__main__":
 
16
  from autogen_ext.models.anthropic import AnthropicChatCompletionClient
17
  from autogen_ext.models.openai import OpenAIChatCompletionClient
18
  from autogen_ext.models.ollama import OllamaChatCompletionClient
19
+ from autogen_ext.models.azure import AzureAIChatCompletionClient
20
+ from azure.core.credentials import AzureKeyCredential
21
  import traceback
22
  import soundfile as sf
23
  import tempfile
24
  from pydub import AudioSegment
25
  from TTS.api import TTS
26
+ import markdown
27
 
28
  # Set up logging
29
  logging.basicConfig(
 
37
  logger = logging.getLogger(__name__)
38
 
39
  # Set up environment
40
+ OUTPUT_DIR = os.path.join(os.getcwd(), "outputs")
41
  os.makedirs(OUTPUT_DIR, exist_ok=True)
42
  logger.info(f"Using output directory: {OUTPUT_DIR}")
43
  os.environ["COQUI_TOS_AGREED"] = "1"
44
 
45
+ # Initialize TTS model
 
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
48
  logger.info("TTS model initialized on %s", device)
49
 
50
+ # Define model for slide data
51
  class Slide(BaseModel):
52
  title: str
53
  content: str
 
55
  class SlidesOutput(BaseModel):
56
  slides: list[Slide]
57
 
58
+ # Search eb tool using SerpApi
59
  def search_web(query: str, serpapi_key: str) -> str:
60
  try:
61
  params = {
 
90
  logger.error("Unexpected error during search: %s", str(e))
91
  return None
92
 
93
+ # Custom function to render Markdown to HTML
94
+ def render_md_to_html(md_content: str) -> str:
95
+ try:
96
+ html_content = markdown.markdown(md_content, extensions=['extra', 'fenced_code', 'tables'])
97
+ return html_content
98
+ except Exception as e:
99
+ logger.error("Failed to render Markdown to HTML: %s", str(e))
100
+ return "<div>Error rendering content</div>"
101
+
102
+ # Define create_slides tool for generating HTML slides
103
+ def create_slides(slides: list[dict], title: str, output_dir: str = OUTPUT_DIR) -> list[str]:
104
+ try:
105
+ html_files = []
106
+ template_file = os.path.join(os.getcwd(), "slide_template.html")
107
+ with open(template_file, "r", encoding="utf-8") as f:
108
+ template_content = f.read()
109
+
110
+ for i, slide in enumerate(slides):
111
+ slide_number = i + 1
112
+ md_content = slide['content']
113
+ html_content = render_md_to_html(md_content)
114
+
115
+ # Replace placeholders in the template
116
+ slide_html = template_content.replace("<!--SLIDE_NUMBER-->", str(slide_number))
117
+ slide_html = slide_html.replace("section title", f"Slide {slide_number}, {slide['title']}")
118
+ slide_html = slide_html.replace("Lecture title", title)
119
+ slide_html = slide_html.replace("<!--CONTENT-->", html_content)
120
+ slide_html = slide_html.replace("speaker name", "Prof. AI Feynman")
121
+ slide_html = slide_html.replace("date", "May 2nd, 2025")
122
+
123
+ html_file = os.path.join(output_dir, f"slide_{slide_number}.html")
124
+ with open(html_file, "w", encoding="utf-8") as f:
125
+ f.write(slide_html)
126
+ logger.info("Generated HTML slide: %s", html_file)
127
+ html_files.append(html_file)
128
+
129
+ # Save slide content as Markdown files
130
+ for i, slide in enumerate(slides):
131
+ slide_number = i + 1
132
+ md_file = os.path.join(output_dir, f"slide_{slide_number}_content.md")
133
+ with open(md_file, "w", encoding="utf-8") as f:
134
+ f.write(slide['content'])
135
+ logger.info("Saved slide content to Markdown: %s", md_file)
136
+
137
+ return html_files
138
+
139
+ except Exception as e:
140
+ logger.error("Failed to create HTML slides: %s", str(e))
141
+ return []
142
+
143
  # Define helper function for progress HTML
144
  def html_with_progress(label, progress):
145
  return f"""
146
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
147
+ <div style="width: 70%; background-color: lightgrey; border-radius: 80px; overflow: hidden; margin-bottom: 20px;">
148
  <div style="width: {progress}%; height: 15px; background-color: #4CAF50; border-radius: 80px;"></div>
149
  </div>
150
  <h2 style="font-style: italic; color: #555;">{label}</h2>
151
  </div>
152
  """
153
 
154
+ # Get model client based on selected service
155
  def get_model_client(service, api_key):
156
  if service == "OpenAI-gpt-4o-2024-08-06":
157
  return OpenAIChatCompletionClient(model="gpt-4o-2024-08-06", api_key=api_key)
 
161
  return OpenAIChatCompletionClient(model="gemini-1.5-flash", api_key=api_key)
162
  elif service == "Ollama-llama3.2":
163
  return OllamaChatCompletionClient(model="llama3.2")
164
+ elif service == "Azure AI Foundry":
165
+ return AzureAIChatCompletionClient(
166
+ model="phi-4",
167
+ endpoint="https://models.inference.ai.azure.com",
168
+ credential=AzureKeyCredential(os.environ.get("GITHUB_TOKEN", "")),
169
+ model_info={
170
+ "json_output": False,
171
+ "function_calling": False,
172
+ "vision": False,
173
+ "family": "unknown",
174
+ "structured_output": False,
175
+ }
176
+ )
177
  else:
178
  raise ValueError("Invalid service")
179
 
 
357
  logger.warning("Unsupported message type for JSON extraction: %s", type(message))
358
  return None
359
 
360
+ # Async update audio preview
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  async def update_audio_preview(audio_file):
362
  if audio_file:
363
  logger.info("Updating audio preview for file: %s", audio_file)
364
  return audio_file
365
  return None
366
 
367
+ # Create a zip file of .md, .txt, and .mp3 files
368
+ def create_zip_of_files(file_paths):
369
  zip_path = os.path.join(OUTPUT_DIR, "lecture_files.zip")
370
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
371
+ for file_path in file_paths:
372
  if os.path.exists(file_path):
373
+ _, ext = os.path.splitext(file_path)
374
+ if ext in ['.md', '.txt', '.mp3']:
375
+ zipf.write(file_path, os.path.basename(file_path))
376
+ logger.info("Added %s to zip", file_path)
377
  logger.info("Created zip file: %s", zip_path)
378
  return zip_path
379
 
380
+ # Access local files
381
+ def get_gradio_file_url(local_path):
382
+ relative_path = os.path.relpath(local_path, os.getcwd())
383
+ return f"/gradio_api/file={relative_path}"
384
+
385
+ # Async generate lecture materials and audio
386
  async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides):
387
  model_client = get_model_client(api_service, api_key)
388
 
389
+ # Total slides include user-specified content slides plus Introduction and Closing slides
390
+ content_slides = num_slides
391
+ total_slides = content_slides + 2
392
 
393
  research_agent = AssistantAgent(
394
  name="research_agent",
 
402
  model_client=model_client,
403
  handoffs=["script_agent"],
404
  system_message=f"""
405
+ You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({content_slides}), generate exactly {content_slides} content slides, plus an Introduction slide as the first slide and a Closing slide as the last slide, making a total of {total_slides} slides.
406
+
407
+ - The Introduction slide (first slide) should have the title "Introduction to {title}" and content containing only the lecture title, speaker name (Prof. AI Feynman), and date (May 2nd, 2025), centered, in plain text.
408
+ - The Closing slide (last slide) should have the title "Closing" and content containing only "The End\nThank you", centered, in plain text.
409
+ - The remaining {content_slides} slides should be content slides based on the lecture description and audience type, with meaningful titles and content in valid Markdown format.
410
+
411
+ Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. After generating the JSON, use the create_slides tool to produce HTML slides, then use the handoff_to_script_agent tool to pass the task to the Script Agent. Do not include any explanatory text or other messages.
412
+
413
+ Example output for 1 content slide (total 3 slides):
414
  ```json
415
  [
416
+ {{"title": "Introduction to AI Basics", "content": "AI Basics\nProf. AI Feynman\nMay 2nd, 2025"}},
417
+ {{"title": "Slide 1: What is AI?", "content": "# What is AI?\n- Definition: Systems that mimic human intelligence\n- Key areas: ML, NLP, Robotics"}},
418
+ {{"title": "Closing", "content": "The End\nThank you"}}
419
  ]
420
  ```""",
421
+ tools=[create_slides],
422
  output_content_type=None,
423
  reflect_on_tool_use=False
424
  )
 
427
  model_client=model_client,
428
  handoffs=["feynman_agent"],
429
  system_message=f"""
430
+ You are a Script Agent model after Richard Feynman. Access the JSON array of {total_slides} slides from the conversation history, which includes an Introduction slide, {content_slides} content slides, and a Closing slide. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone, with humour as a professor feynman would deliver it. Avoid using non-verbal fillers such as "um," "you know," or "like." Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
431
+
432
+ - For the Introduction slide, the script should be a welcoming message introducing the lecture.
433
+ - For the Closing slide, the script should be a brief farewell and thank you message.
434
+ - For the content slides, summarize the slide content academically.
435
+
436
+ Example for 3 slides (1 content slide):
437
  ```json
438
  [
439
+ "Welcome to the lecture on AI Basics. I am Professor AI Feynman, and today we will explore the fundamentals of artificial intelligence.",
440
+ "Let us begin by defining artificial intelligence: it refers to systems that mimic human intelligence, spanning key areas such as machine learning, natural language processing, and robotics.",
441
+ "That concludes our lecture on AI Basics. Thank you for your attention, and I hope you found this session insightful."
442
  ]
443
  ```""",
444
  output_content_type=None,
 
449
  model_client=model_client,
450
  handoffs=[],
451
  system_message=f"""
452
+ You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {total_slides} slides and {total_slides} scripts are received, including the Introduction and Closing slides. Verify that HTML slide files exist in the outputs directory. Output a confirmation message summarizing the number of slides, scripts, and HTML files status. If slides, scripts, or HTML files are missing, invalid, or do not match the expected count ({total_slides}), report the issue clearly. Use 'TERMINATE' to signal completion.
453
+ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files. Lecture is coherent. TERMINATE'
454
  """)
455
 
456
  swarm = Swarm(
 
462
  label = "Research: in progress..."
463
  yield (
464
  html_with_progress(label, progress),
465
+ []
466
  )
467
  await asyncio.sleep(0.1)
468
 
 
470
  Lecture Title: {title}
471
  Lecture Content Description: {lecture_content_description}
472
  Audience: {lecture_type}
473
+ Number of Content Slides: {content_slides}
474
  Please start by researching the topic, or proceed without research if search is unavailable.
475
  """
476
+ logger.info("Starting lecture generation for title: %s with %d content slides (total %d slides)", title, content_slides, total_slides)
477
 
478
  slides = None
479
  scripts = None
480
+ html_files = []
481
  error_html = """
482
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
483
  <h2 style="color: #d9534f;">Failed to generate lecture materials</h2>
 
509
  label = "Slides: generating..."
510
  yield (
511
  html_with_progress(label, progress),
512
+ []
513
  )
514
  await asyncio.sleep(0.1)
515
  elif source == "slide_agent" and message.target == "script_agent":
 
524
  slide_retry_count += 1
525
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
526
  retry_message = TextMessage(
527
+ content=f"Please generate exactly {total_slides} slides (Introduction, {content_slides} content slides, and Closing) as per your instructions.",
528
  source="user",
529
  recipient="slide_agent"
530
  )
 
534
  label = "Scripts: generating..."
535
  yield (
536
  html_with_progress(label, progress),
537
+ []
538
  )
539
  await asyncio.sleep(0.1)
540
  elif source == "script_agent" and message.target == "feynman_agent":
 
548
  label = "Review: in progress..."
549
  yield (
550
  html_with_progress(label, progress),
551
+ []
552
  )
553
  await asyncio.sleep(0.1)
554
 
 
558
  label = "Slides: generating..."
559
  yield (
560
  html_with_progress(label, progress),
561
+ []
562
  )
563
  await asyncio.sleep(0.1)
564
 
 
573
  slide_retry_count += 1
574
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
575
  retry_message = TextMessage(
576
+ content=f"Please generate exactly {total_slides} slides (Introduction, {content_slides} content slides, and Closing) as per your instructions.",
577
  source="user",
578
  recipient="slide_agent"
579
  )
580
  task_result.messages.append(retry_message)
581
  continue
582
+ # Generate HTML slides
583
+ html_files = create_slides(slides, title)
584
+ if not html_files:
585
+ logger.error("Failed to generate HTML slides")
 
 
 
 
586
  progress = 50
587
  label = "Scripts: generating..."
588
  yield (
589
  html_with_progress(label, progress),
590
+ []
591
  )
592
  await asyncio.sleep(0.1)
593
  else:
 
596
  slide_retry_count += 1
597
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
598
  retry_message = TextMessage(
599
+ content=f"Please generate exactly {total_slides} slides (Introduction, {content_slides} content slides, and Closing) as per your instructions.",
600
  source="user",
601
  recipient="slide_agent"
602
  )
 
621
  label = "Scripts generated and saved. Reviewing..."
622
  yield (
623
  html_with_progress(label, progress),
624
+ []
625
  )
626
  await asyncio.sleep(0.1)
627
  else:
 
641
  logger.info("Feynman Agent completed lecture review: %s", message.content)
642
  progress = 90
643
  label = "Lecture materials ready. Generating audio..."
644
+ file_paths = [f for f in os.listdir(OUTPUT_DIR) if f.endswith(('.md', '.txt'))]
645
+ file_paths.sort()
646
+ file_paths = [os.path.join(OUTPUT_DIR, f) for f in file_paths]
 
 
647
  yield (
648
  html_with_progress(label, progress),
649
+ file_paths
 
650
  )
651
  await asyncio.sleep(0.1)
652
 
653
  logger.info("Slides state: %s", "Generated" if slides else "None")
654
  logger.info("Scripts state: %s", "Generated" if scripts else "None")
655
+ logger.info("HTML files state: %s", "Generated" if html_files else "None")
656
  if not slides or not scripts:
657
  error_message = f"Failed to generate {'slides and scripts' if not slides and not scripts else 'slides' if not slides else 'scripts'}"
658
  error_message += f". Received {len(slides) if slides else 0} slides and {len(scripts) if scripts else 0} scripts."
 
663
  logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
664
  yield (
665
  error_html,
666
+ []
667
  )
668
  return
669
 
 
676
  <p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
677
  </div>
678
  """,
679
+ []
680
  )
681
  return
682
 
 
689
  <p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
690
  </div>
691
  """,
692
+ []
693
  )
694
  return
695
 
 
702
  <p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
703
  </div>
704
  """,
705
+ []
706
  )
707
  return
708
 
709
+ # Access the generated HTML files
710
+ html_file_urls = [get_gradio_file_url(html_file) for html_file in html_files]
 
 
 
 
 
 
 
 
 
 
 
 
 
711
  audio_urls = [None] * len(scripts)
712
  audio_timeline = ""
713
  for i in range(len(scripts)):
714
  audio_timeline += f'<audio id="audio-{i+1}" controls src="" style="display: inline-block; margin: 0 10px; width: 200px;"><span>Loading...</span></audio>'
715
 
716
+ file_paths = [f for f in os.listdir(OUTPUT_DIR) if f.endswith(('.md', '.txt'))]
717
+ file_paths.sort()
718
+ file_paths = [os.path.join(OUTPUT_DIR, f) for f in file_paths]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
 
 
720
  audio_files = []
721
  validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
722
  if not validated_speaker_wav:
 
728
  <p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
729
  </div>
730
  """,
731
+ []
732
  )
733
  return
734
 
 
749
  audio_files.append(None)
750
  audio_urls[i] = None
751
  progress = 90 + ((i + 1) / len(scripts)) * 10
752
+ label = f"Generating audio for slide {i + 1}/{len(scripts)}..."
753
  yield (
754
+ html_with_progress(label, progress),
755
+ file_paths
 
756
  )
757
  await asyncio.sleep(0.1)
758
  continue
 
773
 
774
  logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
775
  audio_files.append(audio_file)
776
+ audio_urls[i] = get_gradio_file_url(audio_file)
777
  progress = 90 + ((i + 1) / len(scripts)) * 10
778
+ label = f"Generating audio for slide {i + 1}/{len(scripts)}..."
779
+ file_paths.append(audio_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  yield (
781
+ html_with_progress(label, progress),
782
+ file_paths
 
783
  )
784
  await asyncio.sleep(0.1)
785
  break
 
790
  audio_files.append(None)
791
  audio_urls[i] = None
792
  progress = 90 + ((i + 1) / len(scripts)) * 10
793
+ label = f"Generating audio for slide {i + 1}/{len(scripts)}..."
794
  yield (
795
+ html_with_progress(label, progress),
796
+ file_paths
 
797
  )
798
  await asyncio.sleep(0.1)
799
  break
800
 
801
+ # Create zip file with all materials except .html files
802
+ zip_file = create_zip_of_files(file_paths)
803
+ file_paths.append(zip_file)
804
+
805
+ # Slide hack: Render the lecture container with iframe containing HTML slides
806
+ audio_timeline = ""
807
+ for j, url in enumerate(audio_urls):
808
+ if url:
809
+ audio_timeline += f'<audio id="audio-{j+1}" controls src="{url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
810
+ else:
811
+ audio_timeline += f'<audio id="audio-{j+1}" controls src="" style="display: inline-block; margin: 0 10px; width: 200px;"><span>Audio unavailable</span></audio>'
812
+
813
+ slides_info = json.dumps({"htmlFiles": html_file_urls, "audioFiles": audio_urls})
814
+ html_output = f"""
815
+ <div id="lecture-data" style="display: none;">{slides_info}</div>
816
+ <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
817
+ <div id="slide-content" style="flex: 1; overflow: auto; padding: 20px; text-align: center; background-color: #fff;">
818
+ <iframe id="slide-iframe" style="width: 100%; height: 100%; border: none;"></iframe>
819
+ </div>
820
+ <div style="padding: 20px; text-align: center;">
821
+ <div style="display: flex; justify-content: center; margin-bottom: 10px;">
822
+ {audio_timeline}
823
+ </div>
824
+ <div style="display: center; justify-content: center; margin-bottom: 10px;">
825
+ <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"><i class="fas fa-step-backward"></i></button>
826
+ <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"><i class="fas fa-play"></i></button>
827
+ <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"><i class="fas fa-step-forward"></i></button>
828
+ <button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"><i class="fas fa-expand"></i></button>
829
+ </div>
830
+ </div>
831
+ </div>
832
+ """
833
+ logger.info("Yielding final lecture materials after audio generation")
834
+ yield (
835
+ html_output,
836
+ file_paths
837
+ )
838
+
839
  logger.info("Lecture generation completed successfully")
840
 
841
  except Exception as e:
 
848
  <p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
849
  </div>
850
  """,
851
+ []
852
  )
853
  return
854
 
855
+ # custom js for lecture container features
856
+ js_code = """
857
+ () => {
858
+ // Function to wait for an element to appear in the DOM
859
+ function waitForElement(selector, callback, maxAttempts = 50, interval = 100) {
860
+ let attempts = 0;
861
+ const intervalId = setInterval(() => {
862
+ const element = document.querySelector(selector);
863
+ if (element) {
864
+ clearInterval(intervalId);
865
+ console.log(`Element ${selector} found after ${attempts} attempts`);
866
+ callback(element);
867
+ } else if (attempts >= maxAttempts) {
868
+ clearInterval(intervalId);
869
+ console.error(`Element ${selector} not found after ${maxAttempts} attempts`);
870
+ }
871
+ attempts++;
872
+ }, interval);
873
+ }
874
+
875
+ // Main initialization function
876
+ function initializeSlides() {
877
+ console.log("Initializing slides...");
878
+
879
+ // Wait for lecture-data to load the JSON data
880
+ waitForElement('#lecture-data', (dataElement) => {
881
+ if (!dataElement.textContent) {
882
+ console.error("Lecture data element is empty");
883
+ return;
884
+ }
885
+ let lectureData;
886
+ try {
887
+ lectureData = JSON.parse(dataElement.textContent);
888
+ console.log("Lecture data parsed successfully:", lectureData);
889
+ } catch (e) {
890
+ console.error("Failed to parse lecture data:", e);
891
+ return;
892
+ }
893
+
894
+ if (!lectureData.htmlFiles || lectureData.htmlFiles.length === 0) {
895
+ console.error("No HTML files found in lecture data");
896
+ return;
897
+ }
898
+
899
+ let currentSlide = 0;
900
+ const totalSlides = lectureData.htmlFiles.length;
901
+ let audioElements = [];
902
+ let isPlaying = false;
903
+ let hasNavigated = false; // Track if user has used prev/next buttons
904
+
905
+ // Wait for slide-content element
906
+ waitForElement('#slide-content', (slideContent) => {
907
+ console.log("Slide content element found");
908
+
909
+ // Initialize audio elements
910
+ for (let i = 0; i < totalSlides; i++) {
911
+ const audio = document.getElementById(`audio-${i+1}`);
912
+ if (audio) {
913
+ audioElements.push(audio);
914
+ console.log(`Found audio element audio-${i+1}:`, audio);
915
+ } else {
916
+ console.error(`Audio element audio-${i+1} not found`);
917
+ }
918
+ }
919
+
920
+ function renderSlide() {
921
+ console.log("Rendering slide:", currentSlide + 1);
922
+ if (currentSlide >= 0 && currentSlide < totalSlides && lectureData.htmlFiles[currentSlide]) {
923
+ const iframe = document.getElementById('slide-iframe');
924
+ if (iframe) {
925
+ iframe.src = lectureData.htmlFiles[currentSlide];
926
+ console.log("Set iframe src to:", lectureData.htmlFiles[currentSlide]);
927
+ // Adjust font size based on content length and screen size
928
+ waitForElement('iframe', (iframe) => {
929
+ iframe.onload = () => {
930
+ const doc = iframe.contentDocument || iframe.contentWindow.document;
931
+ const body = doc.body;
932
+ if (body) {
933
+ const textLength = body.textContent.length;
934
+ const screenWidth = window.innerWidth;
935
+ // Base font size: 12px max on large screens, scale down to 8px on small screens
936
+ let baseFontSize = Math.min(12, Math.max(8, 12 * (screenWidth / 1920))); // Scale with screen width (1920px as reference)
937
+ // Adjust inversely with content length
938
+ const adjustedFontSize = Math.max(8, baseFontSize * (1000 / (textLength + 100))); // Minimum 8px, scale down with length
939
+ const elements = body.getElementsByTagName('*');
940
+ for (let elem of elements) {
941
+ elem.style.fontSize = `${adjustedFontSize}px`;
942
+ }
943
+ console.log(`Adjusted font size to ${adjustedFontSize}px for ${textLength} characters on ${screenWidth}px width`);
944
+ }
945
+ };
946
+ });
947
+ } else {
948
+ console.error("Iframe not found");
949
+ }
950
+ } else {
951
+ const iframe = document.getElementById('slide-iframe');
952
+ if (iframe) {
953
+ iframe.src = "about:blank";
954
+ console.log("No valid slide content for index:", currentSlide);
955
+ }
956
+ }
957
+ }
958
+
959
+ function updateSlide(callback) {
960
+ console.log("Updating slide to index:", currentSlide);
961
+ renderSlide();
962
+ // Pause and reset all audio elements
963
+ audioElements.forEach(audio => {
964
+ if (audio && audio.pause) {
965
+ audio.pause();
966
+ audio.currentTime = 0;
967
+ audio.style.border = 'none'; // Reset border
968
+ console.log("Paused and reset audio:", audio.id);
969
+ }
970
+ });
971
+ // Wait briefly to ensure pause completes before proceeding
972
+ setTimeout(() => {
973
+ if (callback) callback();
974
+ }, 100);
975
+ }
976
+
977
+ function updateAudioSources(audioUrls) {
978
+ console.log("Updating audio sources:", audioUrls);
979
+ audioUrls.forEach((url, index) => {
980
+ const audio = audioElements[index];
981
+ if (audio && url && audio.src !== url) {
982
+ audio.src = url;
983
+ audio.load();
984
+ console.log(`Updated audio-${index+1} src to:`, url);
985
+ } else if (!audio) {
986
+ console.error(`Audio element at index ${index} not found`);
987
+ }
988
+ });
989
+ }
990
+
991
+ function prevSlide() {
992
+ console.log("Previous button clicked, current slide:", currentSlide);
993
+ hasNavigated = true; // User has navigated
994
+ if (currentSlide > 0) {
995
+ currentSlide--;
996
+ updateSlide(() => {
997
+ const audio = audioElements[currentSlide];
998
+ if (audio && audio.play && isPlaying) {
999
+ audio.style.border = '50px solid #50f150';
1000
+ audio.style.borderRadius = '50px';
1001
+ audio.play().catch(e => console.error('Audio play failed:', e));
1002
+ }
1003
+ });
1004
+ } else {
1005
+ console.log("Already at first slide");
1006
+ }
1007
+ }
1008
+
1009
+ function nextSlide() {
1010
+ console.log("Next button clicked, current slide:", currentSlide);
1011
+ hasNavigated = true; // User has navigated
1012
+ if (currentSlide < totalSlides - 1) {
1013
+ currentSlide++;
1014
+ updateSlide(() => {
1015
+ const audio = audioElements[currentSlide];
1016
+ if (audio && audio.play && isPlaying) {
1017
+ audio.style.border = '2px solid lightgreen';
1018
+ audio.play().catch(e => console.error('Audio play failed:', e));
1019
+ }
1020
+ });
1021
+ } else {
1022
+ console.log("Already at last slide");
1023
+ }
1024
+ }
1025
+
1026
+ function playAll() {
1027
+ console.log("Play button clicked, isPlaying:", isPlaying);
1028
+ const playBtn = document.getElementById('play-btn');
1029
+ if (!playBtn) {
1030
+ console.error("Play button not found");
1031
+ return;
1032
+ }
1033
+ const playIcon = playBtn.querySelector('i');
1034
+ if (playIcon.className.includes('fa-pause')) {
1035
+ // Pause playback
1036
+ isPlaying = false;
1037
+ audioElements.forEach(audio => {
1038
+ if (audio && audio.pause) {
1039
+ audio.pause();
1040
+ audio.currentTime = 0;
1041
+ audio.style.border = 'none';
1042
+ console.log("Paused audio:", audio.id);
1043
+ }
1044
+ });
1045
+ playIcon.className = 'fas fa-play';
1046
+ return;
1047
+ }
1048
+ // Start playback
1049
+ currentSlide = 0;
1050
+ let index = 0;
1051
+ isPlaying = true;
1052
+ playIcon.className = 'fas fa-pause';
1053
+ updateSlide(() => {
1054
+ function playNext() {
1055
+ if (index >= totalSlides || !isPlaying) {
1056
+ isPlaying = false;
1057
+ playIcon.className = 'fas fa-play';
1058
+ audioElements.forEach(audio => {
1059
+ if (audio) audio.style.border = 'none';
1060
+ });
1061
+ console.log("Finished playing all slides or paused");
1062
+ return;
1063
+ }
1064
+ currentSlide = index;
1065
+ updateSlide(() => {
1066
+ const audio = audioElements[index];
1067
+ if (audio && audio.play) {
1068
+ // Highlight the current audio element
1069
+ audioElements.forEach(a => a.style.border = 'none');
1070
+ audio.style.border = '2px solid lightgreen';
1071
+ console.log(`Attempting to play audio for slide ${index + 1}`);
1072
+ audio.play().then(() => {
1073
+ console.log(`Playing audio for slide ${index + 1}`);
1074
+ // Remove any existing ended listeners to prevent duplicates
1075
+ audio.onended = null;
1076
+ audio.addEventListener('ended', () => {
1077
+ console.log(`Audio ended for slide ${index + 1}`);
1078
+ index++;
1079
+ playNext();
1080
+ }, { once: true });
1081
+ // Fallback: Check if audio is stuck (e.g., duration not advancing)
1082
+ const checkDuration = setInterval(() => {
1083
+ if (!isPlaying) {
1084
+ clearInterval(checkDuration);
1085
+ return;
1086
+ }
1087
+ if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
1088
+ console.log(`Fallback: Audio for slide ${index + 1} considered ended`);
1089
+ clearInterval(checkDuration);
1090
+ audio.onended = null; // Prevent duplicate triggers
1091
+ index++;
1092
+ playNext();
1093
+ }
1094
+ }, 1000);
1095
+ }).catch(e => {
1096
+ console.error(`Audio play failed for slide ${index + 1}:`, e);
1097
+ // Retry playing the same slide after a short delay
1098
+ setTimeout(() => {
1099
+ audio.play().then(() => {
1100
+ console.log(`Retry succeeded for slide ${index + 1}`);
1101
+ audio.onended = null;
1102
+ audio.addEventListener('ended', () => {
1103
+ console.log(`Audio ended for slide ${index + 1}`);
1104
+ index++;
1105
+ playNext();
1106
+ }, { once: true });
1107
+ const checkDuration = setInterval(() => {
1108
+ if (!isPlaying) {
1109
+ clearInterval(checkDuration);
1110
+ return;
1111
+ }
1112
+ if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
1113
+ console.log(`Fallback: Audio for slide ${index + 1} considered ended`);
1114
+ clearInterval(checkDuration);
1115
+ audio.onended = null;
1116
+ index++;
1117
+ playNext();
1118
+ }
1119
+ }, 1000);
1120
+ }).catch(e => {
1121
+ console.error(`Retry failed for slide ${index + 1}:`, e);
1122
+ index++; // Move to next slide if retry fails
1123
+ playNext();
1124
+ });
1125
+ }, 500);
1126
+ });
1127
+ } else {
1128
+ index++;
1129
+ playNext();
1130
+ }
1131
+ });
1132
+ }
1133
+ playNext();
1134
+ });
1135
+ }
1136
+
1137
+ function toggleFullScreen() {
1138
+ console.log("Fullscreen button clicked");
1139
+ const container = document.getElementById('lecture-container');
1140
+ if (!container) {
1141
+ console.error("Lecture container not found");
1142
+ return;
1143
+ }
1144
+ if (!document.fullscreenElement) {
1145
+ container.requestFullscreen().catch(err => {
1146
+ console.error('Error enabling full-screen:', err);
1147
+ });
1148
+ } else {
1149
+ document.exitFullscreen();
1150
+ console.log("Exited fullscreen");
1151
+ }
1152
+ }
1153
+
1154
+ // Attach event listeners
1155
+ waitForElement('#prev-btn', (prevBtn) => {
1156
+ prevBtn.addEventListener('click', prevSlide);
1157
+ console.log("Attached event listener to prev-btn");
1158
+ });
1159
+
1160
+ waitForElement('#play-btn', (playBtn) => {
1161
+ playBtn.addEventListener('click', playAll);
1162
+ console.log("Attached event listener to play-btn");
1163
+ });
1164
+
1165
+ waitForElement('#next-btn', (nextBtn) => {
1166
+ nextBtn.addEventListener('click', nextSlide);
1167
+ console.log("Attached event listener to next-btn");
1168
+ });
1169
+
1170
+ waitForElement('#fullscreen-btn', (fullscreenBtn) => {
1171
+ fullscreenBtn.addEventListener('click', toggleFullScreen);
1172
+ console.log("Attached event listener to fullscreen-btn");
1173
+ });
1174
+
1175
+ // Initialize audio sources and render first slide
1176
+ updateAudioSources(lectureData.audioFiles);
1177
+ renderSlide();
1178
+ console.log("Initial slide rendered, starting at slide:", currentSlide + 1);
1179
+ });
1180
+ });
1181
+ }
1182
+
1183
+ // Observe DOM changes to detect when lecture container is added
1184
+ const observer = new MutationObserver((mutations) => {
1185
+ mutations.forEach((mutation) => {
1186
+ if (mutation.addedNodes.length) {
1187
+ const lectureContainer = document.getElementById('lecture-container');
1188
+ if (lectureContainer) {
1189
+ console.log("Lecture container detected in DOM");
1190
+ observer.disconnect(); // Stop observing once found
1191
+ initializeSlides();
1192
+ }
1193
+ }
1194
+ });
1195
+ });
1196
+
1197
+ // Start observing the document body for changes
1198
+ observer.observe(document.body, { childList: true, subtree: true });
1199
+ console.log("Started observing DOM for lecture container");
1200
+ }
1201
+ """
1202
+
1203
  # Gradio interface
1204
+ with gr.Blocks(
1205
+ title="Agent Feynman",
1206
+ css="""
1207
+ #lecture-container {font-family: 'Times New Roman', Times, serif;}
1208
+ #slide-content {font-size: 48px; line-height: 1.2;}
1209
+ #form-group {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; font-weight: 900; color: #000; background-color: white;}
1210
+ #download {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px;}
1211
+ #slide-display {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white;}
1212
+ button {transition: background-color 0.3s;}
1213
+ button:hover {background-color: #e0e0e0;}
1214
+ """,
1215
+ js=js_code,
1216
+ head='<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">'
1217
+ ) as demo:
1218
+ gr.Markdown("""
1219
+ # <center>Professor AI Feynman: A Multi-Agent Tool for Learning Anything the Feynman way.</center>
1220
+ ## <center>(Jaward Sesay - Microsoft AI Agent Hackathon Submission)</center>""")
1221
  with gr.Row():
1222
  with gr.Column(scale=1):
1223
+ with gr.Group(elem_id="form-group"):
1224
  title = gr.Textbox(label="Lecture Title", placeholder="e.g. Introduction to AI")
1225
  lecture_content_description = gr.Textbox(label="Lecture Content Description", placeholder="e.g. Focus on recent advancements")
1226
  lecture_type = gr.Dropdown(["Conference", "University", "High school"], label="Audience", value="University")
 
1229
  "OpenAI-gpt-4o-2024-08-06",
1230
  "Anthropic-claude-3-sonnet-20240229",
1231
  "Google-gemini-1.5-flash",
1232
+ "Ollama-llama3.2",
1233
+ "Azure AI Foundry"
1234
  ],
1235
  label="Model",
1236
  value="Google-gemini-1.5-flash"
1237
  )
1238
+ api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama or Azure AI Foundry (use GITHUB_TOKEN env var)")
1239
+ serpapi_key = gr.Textbox(label="SerpApi Key (For Research Agent)", type="password", placeholder="Enter your SerpApi key (optional)")
1240
+ num_slides = gr.Slider(1, 20, step=1, label="Number of Content Slides", value=3)
1241
+ speaker_audio = gr.Audio(label="Speaker sample speech (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
1242
  generate_btn = gr.Button("Generate Lecture")
1243
  with gr.Column(scale=2):
1244
  default_slide_html = """
1245
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
1246
+ <h2 style="font-style: italic; color: #000;">Waiting for lecture content...</h2>
1247
  <p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
1248
  </div>
1249
  """
1250
+ slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html, elem_id="slide-display")
1251
+ file_output = gr.File(label="Download Lecture Materials", elem_id="download")
 
1252
 
1253
  speaker_audio.change(
1254
  fn=update_audio_preview,
 
1259
  generate_btn.click(
1260
  fn=on_generate,
1261
  inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
1262
+ outputs=[slide_display, file_output]
1263
  )
1264
 
1265
  if __name__ == "__main__":