Jaward commited on
Commit
e76ee28
·
verified ·
1 Parent(s): c8e2a18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +292 -137
app.py CHANGED
@@ -33,15 +33,22 @@ logging.basicConfig(
33
  logger = logging.getLogger(__name__)
34
 
35
  # Set up environment
36
- OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") # Fallback for local dev
 
 
 
37
  os.makedirs(OUTPUT_DIR, exist_ok=True)
38
  logger.info(f"Using output directory: {OUTPUT_DIR}")
39
  os.environ["COQUI_TOS_AGREED"] = "1"
40
 
41
- device = "cuda" if torch.cuda.is_available() else "cpu"
42
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
43
- logger.info("TTS model initialized on %s", device)
44
-
 
 
 
 
45
 
46
  # Define Pydantic model for slide data
47
  class Slide(BaseModel):
@@ -394,6 +401,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
394
  label = "Research: in progress..."
395
  yield (
396
  html_with_progress(label, progress),
 
 
397
  []
398
  )
399
  await asyncio.sleep(0.1)
@@ -440,6 +449,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
440
  label = "Slides: generating..."
441
  yield (
442
  html_with_progress(label, progress),
 
 
443
  []
444
  )
445
  await asyncio.sleep(0.1)
@@ -465,6 +476,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
465
  label = "Scripts: generating..."
466
  yield (
467
  html_with_progress(label, progress),
 
 
468
  []
469
  )
470
  await asyncio.sleep(0.1)
@@ -479,6 +492,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
479
  label = "Review: in progress..."
480
  yield (
481
  html_with_progress(label, progress),
 
 
482
  []
483
  )
484
  await asyncio.sleep(0.1)
@@ -489,6 +504,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
489
  label = "Slides: generating..."
490
  yield (
491
  html_with_progress(label, progress),
 
 
492
  []
493
  )
494
  await asyncio.sleep(0.1)
@@ -522,6 +539,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
522
  label = "Scripts: generating..."
523
  yield (
524
  html_with_progress(label, progress),
 
 
525
  []
526
  )
527
  await asyncio.sleep(0.1)
@@ -556,6 +575,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
556
  label = "Scripts generated and saved. Reviewing..."
557
  yield (
558
  html_with_progress(label, progress),
 
 
559
  []
560
  )
561
  await asyncio.sleep(0.1)
@@ -578,6 +599,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
578
  label = "Lecture materials ready. Generating audio..."
579
  yield (
580
  html_with_progress(label, progress),
 
 
581
  []
582
  )
583
  await asyncio.sleep(0.1)
@@ -593,12 +616,9 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
593
  source = getattr(msg, 'source', getattr(msg, 'sender', None))
594
  logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
595
  yield (
596
- f"""
597
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
598
- <h2 style="color: #d9534f;">{error_message}</h2>
599
- <p style="margin-top: 20px;">Please try again with a different model or adjust your inputs.</p>
600
- </div>
601
- """,
602
  []
603
  )
604
  return
@@ -612,6 +632,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
612
  <p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
613
  </div>
614
  """,
 
 
615
  []
616
  )
617
  return
@@ -625,6 +647,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
625
  <p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
626
  </div>
627
  """,
 
 
628
  []
629
  )
630
  return
@@ -638,6 +662,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
638
  <p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
639
  </div>
640
  """,
 
 
641
  []
642
  )
643
  return
@@ -652,127 +678,41 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
652
  <p style="margin-top: 20px;">Please try again.</p>
653
  </div>
654
  """,
 
 
655
  []
656
  )
657
  return
658
 
659
- audio_files = []
660
- audio_urls = []
661
- validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
662
- if not validated_speaker_wav:
663
- logger.error("Invalid speaker audio after conversion, skipping TTS")
664
- yield (
665
- f"""
666
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
667
- <h2 style="color: #d9534f;">Invalid speaker audio</h2>
668
- <p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
669
- </div>
670
- """,
671
- []
672
- )
673
- return
674
-
675
- for i, script in enumerate(scripts):
676
- cleaned_script = clean_script_text(script)
677
- audio_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}.mp3")
678
- script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_script.txt")
679
-
680
- try:
681
- with open(script_file, "w", encoding="utf-8") as f:
682
- f.write(cleaned_script or "")
683
- logger.info("Saved script to %s: %s", script_file, cleaned_script)
684
- except Exception as e:
685
- logger.error("Error saving script to %s: %s", script_file, str(e))
686
-
687
- if not cleaned_script:
688
- logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
689
- audio_files.append(None)
690
- audio_urls.append(None)
691
- progress = 90 + ((i + 1) / len(scripts)) * 10
692
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
693
- yield (
694
- html_with_progress(label, progress),
695
- []
696
- )
697
- await asyncio.sleep(0.1)
698
- continue
699
-
700
- max_audio_retries = 2
701
- for attempt in range(max_audio_retries + 1):
702
- try:
703
- current_text = cleaned_script
704
- if attempt > 0:
705
- sentences = re.split(r"[.!?]+", cleaned_script)
706
- sentences = [s.strip() for s in sentences if s.strip()][:2]
707
- current_text = ". ".join(sentences) + "."
708
- logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
709
-
710
- success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
711
- if not success:
712
- raise RuntimeError("TTS generation failed")
713
-
714
- logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
715
- audio_files.append(audio_file)
716
- # Use Gradio's file serving URL
717
- audio_urls.append(f"/gradio_api/file={audio_file}")
718
- progress = 90 + ((i + 1) / len(scripts)) * 10
719
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
720
- yield (
721
- html_with_progress(label, progress),
722
- []
723
- )
724
- await asyncio.sleep(0.1)
725
- break
726
- except Exception as e:
727
- logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
728
- if attempt == max_audio_retries:
729
- logger.error("Max retries reached for slide %d, skipping", i + 1)
730
- audio_files.append(None)
731
- audio_urls.append(None)
732
- progress = 90 + ((i + 1) / len(scripts)) * 10
733
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
734
- yield (
735
- html_with_progress(label, progress),
736
- []
737
- )
738
- await asyncio.sleep(0.1)
739
- break
740
-
741
  # Collect .txt files for download
742
  txt_files = [f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')]
743
  txt_files.sort() # Sort for consistent display
744
  txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
745
 
746
- # Generate audio timeline with playable audio elements
 
747
  audio_timeline = ""
748
- for i, audio_url in enumerate(audio_urls):
749
- if audio_url:
750
- audio_timeline += f'<audio id="audio-{i+1}" controls src="{audio_url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
751
- else:
752
- audio_timeline += f'<span id="audio-{i+1}" style="display: inline-block; margin: 0 10px;">slide_{i+1}.mp3 (not generated)</span>'
753
 
754
- slides_info = json.dumps({"slides": markdown_slides, "audioFiles": audio_urls})
755
-
756
- html_output = f"""
757
- <script src="https://cdn.jsdelivr.net/npm/[email protected]/marked.min.js"></script>
758
  <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
759
- <div id="slide-content" style="flex: 1; overflow: auto; padding: 20px; text-align: center; background-color: #fff; color: #333;">
760
- <!-- Slides will be rendered here -->
761
- </div>
762
  <div style="padding: 20px; text-align: center;">
763
- <div style="display: flex; justify-content: center; margin-bottom: 10px;">
764
  {audio_timeline}
765
  </div>
766
  <div style="display: flex; justify-content: center; margin-bottom: 10px;">
767
  <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
768
  <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
769
  <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
770
- <button style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">☐</button>
771
  </div>
772
  </div>
773
  </div>
774
  <script>
775
- const lectureData = {slides_info};
776
  let currentSlide = 0;
777
  const totalSlides = lectureData.slides.length;
778
  let audioElements = [];
@@ -783,26 +723,15 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
783
  audioElements.push(audio);
784
  }}
785
 
786
- function renderSlide() {{
787
- const slideContent = document.getElementById('slide-content');
788
- if (lectureData.slides[currentSlide]) {{
789
- const markdownText = lectureData.slides[currentSlide];
790
- const htmlContent = marked.parse(markdownText);
791
- slideContent.innerHTML = htmlContent;
792
- console.log("Rendering slide:", markdownText);
793
- console.log("Rendered HTML:", htmlContent);
794
- }} else {{
795
- slideContent.innerHTML = '<h2>No slide content available</h2>';
796
- console.log("No slide content for index:", currentSlide);
797
- }}
798
- }}
799
-
800
- function updateSlide() {{
801
- renderSlide();
802
- audioElements.forEach(audio => {{
803
  if (audio && audio.pause) {{
804
  audio.pause();
805
  audio.currentTime = 0;
 
 
 
806
  }}
807
  }});
808
  }}
@@ -810,23 +739,25 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
810
  function prevSlide() {{
811
  if (currentSlide > 0) {{
812
  currentSlide--;
813
- updateSlide();
814
  }}
815
  }}
816
 
817
  function nextSlide() {{
818
  if (currentSlide < totalSlides - 1) {{
819
  currentSlide++;
820
- updateSlide();
821
  }}
822
  }}
823
 
824
  function playAll() {{
825
- let index = 0;
826
  function playNext() {{
827
  if (index >= totalSlides) return;
 
 
828
  const audio = audioElements[index];
829
- if (audio && audio.play) {{
830
  audio.play().then(() => {{
831
  audio.addEventListener('ended', () => {{
832
  index++;
@@ -845,20 +776,226 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
845
  playNext();
846
  }}
847
 
 
 
 
 
 
 
 
 
 
 
 
848
  // Attach event listeners
849
  document.getElementById('prev-btn').addEventListener('click', prevSlide);
850
  document.getElementById('play-btn').addEventListener('click', playAll);
851
  document.getElementById('next-btn').addEventListener('click', nextSlide);
 
852
 
853
  // Initialize first slide
854
- renderSlide();
855
  </script>
856
  """
857
- logger.info("Lecture generation completed successfully")
858
  yield (
859
- html_output,
860
- txt_file_paths
 
 
861
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
862
 
863
  except Exception as e:
864
  logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
@@ -870,6 +1007,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
870
  <p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
871
  </div>
872
  """,
 
 
873
  []
874
  )
875
  return
@@ -905,7 +1044,8 @@ with gr.Blocks(title="Agent Feynman") as demo:
905
  <p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
906
  </div>
907
  """
908
- slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html)
 
909
  file_output = gr.File(label="Download Generated Files")
910
 
911
  speaker_audio.change(
@@ -914,11 +1054,26 @@ with gr.Blocks(title="Agent Feynman") as demo:
914
  outputs=speaker_audio
915
  )
916
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
917
  generate_btn.click(
918
  fn=on_generate,
919
  inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
920
- outputs=[slide_display, file_output]
921
  )
922
 
923
  if __name__ == "__main__":
924
- demo.launch(allowed_paths=[OUTPUT_DIR], max_file_size="5mb")
 
33
  logger = logging.getLogger(__name__)
34
 
35
  # Set up environment
36
+ if os.path.exists("/tmp"):
37
+ OUTPUT_DIR = "/tmp/outputs" # Use /tmp for Huggingface Spaces
38
+ else:
39
+ OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") # Fallback for local dev
40
  os.makedirs(OUTPUT_DIR, exist_ok=True)
41
  logger.info(f"Using output directory: {OUTPUT_DIR}")
42
  os.environ["COQUI_TOS_AGREED"] = "1"
43
 
44
+ # Initialize TTS model at the top
45
+ try:
46
+ device = "cuda" if torch.cuda.is_available() else "cpu"
47
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
48
+ logger.info("TTS model initialized on %s", device)
49
+ except Exception as e:
50
+ logger.error("Failed to initialize TTS model: %s", str(e))
51
+ tts = None
52
 
53
  # Define Pydantic model for slide data
54
  class Slide(BaseModel):
 
401
  label = "Research: in progress..."
402
  yield (
403
  html_with_progress(label, progress),
404
+ [],
405
+ "",
406
  []
407
  )
408
  await asyncio.sleep(0.1)
 
449
  label = "Slides: generating..."
450
  yield (
451
  html_with_progress(label, progress),
452
+ [],
453
+ "",
454
  []
455
  )
456
  await asyncio.sleep(0.1)
 
476
  label = "Scripts: generating..."
477
  yield (
478
  html_with_progress(label, progress),
479
+ [],
480
+ "",
481
  []
482
  )
483
  await asyncio.sleep(0.1)
 
492
  label = "Review: in progress..."
493
  yield (
494
  html_with_progress(label, progress),
495
+ [],
496
+ "",
497
  []
498
  )
499
  await asyncio.sleep(0.1)
 
504
  label = "Slides: generating..."
505
  yield (
506
  html_with_progress(label, progress),
507
+ [],
508
+ "",
509
  []
510
  )
511
  await asyncio.sleep(0.1)
 
539
  label = "Scripts: generating..."
540
  yield (
541
  html_with_progress(label, progress),
542
+ [],
543
+ "",
544
  []
545
  )
546
  await asyncio.sleep(0.1)
 
575
  label = "Scripts generated and saved. Reviewing..."
576
  yield (
577
  html_with_progress(label, progress),
578
+ [],
579
+ "",
580
  []
581
  )
582
  await asyncio.sleep(0.1)
 
599
  label = "Lecture materials ready. Generating audio..."
600
  yield (
601
  html_with_progress(label, progress),
602
+ [],
603
+ "",
604
  []
605
  )
606
  await asyncio.sleep(0.1)
 
616
  source = getattr(msg, 'source', getattr(msg, 'sender', None))
617
  logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
618
  yield (
619
+ error_html,
620
+ [],
621
+ "",
 
 
 
622
  []
623
  )
624
  return
 
632
  <p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
633
  </div>
634
  """,
635
+ [],
636
+ "",
637
  []
638
  )
639
  return
 
647
  <p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
648
  </div>
649
  """,
650
+ [],
651
+ "",
652
  []
653
  )
654
  return
 
662
  <p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
663
  </div>
664
  """,
665
+ [],
666
+ "",
667
  []
668
  )
669
  return
 
678
  <p style="margin-top: 20px;">Please try again.</p>
679
  </div>
680
  """,
681
+ [],
682
+ "",
683
  []
684
  )
685
  return
686
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
687
  # Collect .txt files for download
688
  txt_files = [f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')]
689
  txt_files.sort() # Sort for consistent display
690
  txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
691
 
692
+ # Initialize audio timeline placeholders
693
+ audio_urls = [None] * len(scripts)
694
  audio_timeline = ""
695
+ for i in range(len(scripts)):
696
+ audio_timeline += f'<audio id="audio-{i+1}" controls style="display: inline-block; margin: 0 10px; width: 200px;"><source src="" type="audio/mpeg"></audio>'
 
 
 
697
 
698
+ # Display lecture materials immediately
699
+ slides_json = json.dumps({"slides": markdown_slides, "audioFiles": audio_urls})
700
+ html_controls = f"""
 
701
  <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
 
 
 
702
  <div style="padding: 20px; text-align: center;">
703
+ <div id="audio-timeline" style="display: flex; justify-content: center; margin-bottom: 10px;">
704
  {audio_timeline}
705
  </div>
706
  <div style="display: flex; justify-content: center; margin-bottom: 10px;">
707
  <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
708
  <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
709
  <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
710
+ <button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">🖥️</button>
711
  </div>
712
  </div>
713
  </div>
714
  <script>
715
+ const lectureData = {slides_json};
716
  let currentSlide = 0;
717
  const totalSlides = lectureData.slides.length;
718
  let audioElements = [];
 
723
  audioElements.push(audio);
724
  }}
725
 
726
+ function updateSlideDisplay() {{
727
+ window.updateSlideContent(lectureData.slides[currentSlide]);
728
+ audioElements.forEach((audio, index) => {{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
  if (audio && audio.pause) {{
730
  audio.pause();
731
  audio.currentTime = 0;
732
+ if (index === currentSlide && audio.src) {{
733
+ audio.play().catch(e => console.error('Audio play failed:', e));
734
+ }}
735
  }}
736
  }});
737
  }}
 
739
  function prevSlide() {{
740
  if (currentSlide > 0) {{
741
  currentSlide--;
742
+ updateSlideDisplay();
743
  }}
744
  }}
745
 
746
  function nextSlide() {{
747
  if (currentSlide < totalSlides - 1) {{
748
  currentSlide++;
749
+ updateSlideDisplay();
750
  }}
751
  }}
752
 
753
  function playAll() {{
754
+ let index = currentSlide;
755
  function playNext() {{
756
  if (index >= totalSlides) return;
757
+ currentSlide = index;
758
+ updateSlideDisplay();
759
  const audio = audioElements[index];
760
+ if (audio && audio.src) {{
761
  audio.play().then(() => {{
762
  audio.addEventListener('ended', () => {{
763
  index++;
 
776
  playNext();
777
  }}
778
 
779
+ function toggleFullScreen() {{
780
+ const container = document.getElementById('lecture-container');
781
+ if (!document.fullscreenElement) {{
782
+ container.requestFullscreen().catch(err => {{
783
+ console.error(`Error attempting to enable full-screen mode: ${{err.message}}`);
784
+ }});
785
+ }} else {{
786
+ document.exitFullscreen();
787
+ }}
788
+ }}
789
+
790
  // Attach event listeners
791
  document.getElementById('prev-btn').addEventListener('click', prevSlide);
792
  document.getElementById('play-btn').addEventListener('click', playAll);
793
  document.getElementById('next-btn').addEventListener('click', nextSlide);
794
+ document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
795
 
796
  // Initialize first slide
797
+ updateSlideDisplay();
798
  </script>
799
  """
 
800
  yield (
801
+ html_controls,
802
+ txt_file_paths,
803
+ markdown_slides[0],
804
+ []
805
  )
806
+
807
+ # Audio generation
808
+ audio_files = []
809
+ validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
810
+ if not validated_speaker_wav:
811
+ logger.error("Invalid speaker audio after conversion, skipping TTS")
812
+ yield (
813
+ html_controls,
814
+ txt_file_paths,
815
+ markdown_slides[0],
816
+ []
817
+ )
818
+ return
819
+
820
+ for i, script in enumerate(scripts):
821
+ cleaned_script = clean_script_text(script)
822
+ audio_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}.mp3")
823
+ script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_script.txt")
824
+
825
+ try:
826
+ with open(script_file, "w", encoding="utf-8") as f:
827
+ f.write(cleaned_script or "")
828
+ logger.info("Saved script to %s: %s", script_file, cleaned_script)
829
+ except Exception as e:
830
+ logger.error("Error saving script to %s: %s", script_file, str(e))
831
+
832
+ if not cleaned_script:
833
+ logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
834
+ audio_files.append(None)
835
+ audio_urls[i] = None
836
+ progress = 90 + ((i + 1) / len(scripts)) * 10
837
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
838
+ yield (
839
+ html_controls,
840
+ txt_file_paths,
841
+ markdown_slides[currentSlide if 'currentSlide' in locals() else 0],
842
+ []
843
+ )
844
+ await asyncio.sleep(0.1)
845
+ continue
846
+
847
+ max_audio_retries = 2
848
+ for attempt in range(max_audio_retries + 1):
849
+ try:
850
+ current_text = cleaned_script
851
+ if attempt > 0:
852
+ sentences = re.split(r"[.!?]+", cleaned_script)
853
+ sentences = [s.strip() for s in sentences if s.strip()][:2]
854
+ current_text = ". ".join(sentences) + "."
855
+ logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
856
+
857
+ success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
858
+ if not success:
859
+ raise RuntimeError("TTS generation failed")
860
+
861
+ logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
862
+ audio_files.append(audio_file)
863
+ audio_urls[i] = f"/gradio_api/file={audio_file}"
864
+ # Update the audio element's src
865
+ audio_timeline = ""
866
+ for j, url in enumerate(audio_urls):
867
+ if url:
868
+ audio_timeline += f'<audio id="audio-{j+1}" controls src="{url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
869
+ else:
870
+ audio_timeline += f'<audio id="audio-{j+1}" controls style="display: inline-block; margin: 0 10px; width: 200px;"><source src="" type="audio/mpeg"></audio>'
871
+ html_controls = f"""
872
+ <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
873
+ <div style="padding: 20px; text-align: center;">
874
+ <div id="audio-timeline" style="display: flex; justify-content: center; margin-bottom: 10px;">
875
+ {audio_timeline}
876
+ </div>
877
+ <div style="display: flex; justify-content: center; margin-bottom: 10px;">
878
+ <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
879
+ <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
880
+ <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
881
+ <button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">🖥️</button>
882
+ </div>
883
+ </div>
884
+ </div>
885
+ <script>
886
+ const lectureData = {json.dumps({"slides": markdown_slides, "audioFiles": audio_urls})};
887
+ let currentSlide = {currentSlide if 'currentSlide' in locals() else 0};
888
+ const totalSlides = lectureData.slides.length;
889
+ let audioElements = [];
890
+
891
+ // Populate audio elements
892
+ for (let i = 0; i < totalSlides; i++) {{
893
+ const audio = document.getElementById(`audio-${{i+1}}`);
894
+ audioElements.push(audio);
895
+ }}
896
+
897
+ function updateSlideDisplay() {{
898
+ window.updateSlideContent(lectureData.slides[currentSlide]);
899
+ audioElements.forEach((audio, index) => {{
900
+ if (audio && audio.pause) {{
901
+ audio.pause();
902
+ audio.currentTime = 0;
903
+ if (index === currentSlide && audio.src) {{
904
+ audio.play().catch(e => console.error('Audio play failed:', e));
905
+ }}
906
+ }}
907
+ }});
908
+ }}
909
+
910
+ function prevSlide() {{
911
+ if (currentSlide > 0) {{
912
+ currentSlide--;
913
+ updateSlideDisplay();
914
+ }}
915
+ }}
916
+
917
+ function nextSlide() {{
918
+ if (currentSlide < totalSlides - 1) {{
919
+ currentSlide++;
920
+ updateSlideDisplay();
921
+ }}
922
+ }}
923
+
924
+ function playAll() {{
925
+ let index = currentSlide;
926
+ function playNext() {{
927
+ if (index >= totalSlides) return;
928
+ currentSlide = index;
929
+ updateSlideDisplay();
930
+ const audio = audioElements[index];
931
+ if (audio && audio.src) {{
932
+ audio.play().then(() => {{
933
+ audio.addEventListener('ended', () => {{
934
+ index++;
935
+ playNext();
936
+ }}, {{ once: true }});
937
+ }}).catch(e => {{
938
+ console.error('Audio play failed:', e);
939
+ index++;
940
+ playNext();
941
+ }});
942
+ }} else {{
943
+ index++;
944
+ playNext();
945
+ }}
946
+ }}
947
+ playNext();
948
+ }}
949
+
950
+ function toggleFullScreen() {{
951
+ const container = document.getElementById('lecture-container');
952
+ if (!document.fullscreenElement) {{
953
+ container.requestFullscreen().catch(err => {{
954
+ console.error(`Error attempting to enable full-screen mode: ${{err.message}}`);
955
+ }});
956
+ }} else {{
957
+ document.exitFullscreen();
958
+ }}
959
+ }}
960
+
961
+ // Attach event listeners
962
+ document.getElementById('prev-btn').addEventListener('click', prevSlide);
963
+ document.getElementById('play-btn').addEventListener('click', playAll);
964
+ document.getElementById('next-btn').addEventListener('click', nextSlide);
965
+ document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
966
+
967
+ // Initialize first slide
968
+ updateSlideDisplay();
969
+ </script>
970
+ """
971
+ progress = 90 + ((i + 1) / len(scripts)) * 10
972
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
973
+ yield (
974
+ html_controls,
975
+ txt_file_paths,
976
+ markdown_slides[currentSlide if 'currentSlide' in locals() else 0],
977
+ []
978
+ )
979
+ await asyncio.sleep(0.1)
980
+ break
981
+ except Exception as e:
982
+ logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
983
+ if attempt == max_audio_retries:
984
+ logger.error("Max retries reached for slide %d, skipping", i + 1)
985
+ audio_files.append(None)
986
+ audio_urls[i] = None
987
+ progress = 90 + ((i + 1) / len(scripts)) * 10
988
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
989
+ yield (
990
+ html_controls,
991
+ txt_file_paths,
992
+ markdown_slides[currentSlide if 'currentSlide' in locals() else 0],
993
+ []
994
+ )
995
+ await asyncio.sleep(0.1)
996
+ break
997
+
998
+ logger.info("Lecture generation completed successfully")
999
 
1000
  except Exception as e:
1001
  logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
 
1007
  <p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
1008
  </div>
1009
  """,
1010
+ [],
1011
+ "",
1012
  []
1013
  )
1014
  return
 
1044
  <p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
1045
  </div>
1046
  """
1047
+ slide_display = gr.Markdown(label="Lecture Slides", value="Waiting for lecture content...")
1048
+ controls_display = gr.HTML(label="Controls", value=default_slide_html)
1049
  file_output = gr.File(label="Download Generated Files")
1050
 
1051
  speaker_audio.change(
 
1054
  outputs=speaker_audio
1055
  )
1056
 
1057
+ # JavaScript to update slide content dynamically
1058
+ demo.load(
1059
+ fn=None,
1060
+ inputs=None,
1061
+ outputs=None,
1062
+ _js="""
1063
+ () => {
1064
+ window.updateSlideContent = (content) => {
1065
+ document.querySelector('#slide-display textarea').value = content;
1066
+ document.querySelector('#slide-display').dispatchEvent(new Event('input'));
1067
+ };
1068
+ }
1069
+ """
1070
+ )
1071
+
1072
  generate_btn.click(
1073
  fn=on_generate,
1074
  inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
1075
+ outputs=[controls_display, file_output, slide_display, gr.State()]
1076
  )
1077
 
1078
  if __name__ == "__main__":
1079
+ demo.launch(allowed_paths=[OUTPUT_DIR])