Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -33,15 +33,22 @@ logging.basicConfig(
|
|
33 |
logger = logging.getLogger(__name__)
|
34 |
|
35 |
# Set up environment
|
36 |
-
|
|
|
|
|
|
|
37 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
38 |
logger.info(f"Using output directory: {OUTPUT_DIR}")
|
39 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
45 |
|
46 |
# Define Pydantic model for slide data
|
47 |
class Slide(BaseModel):
|
@@ -394,6 +401,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
394 |
label = "Research: in progress..."
|
395 |
yield (
|
396 |
html_with_progress(label, progress),
|
|
|
|
|
397 |
[]
|
398 |
)
|
399 |
await asyncio.sleep(0.1)
|
@@ -440,6 +449,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
440 |
label = "Slides: generating..."
|
441 |
yield (
|
442 |
html_with_progress(label, progress),
|
|
|
|
|
443 |
[]
|
444 |
)
|
445 |
await asyncio.sleep(0.1)
|
@@ -465,6 +476,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
465 |
label = "Scripts: generating..."
|
466 |
yield (
|
467 |
html_with_progress(label, progress),
|
|
|
|
|
468 |
[]
|
469 |
)
|
470 |
await asyncio.sleep(0.1)
|
@@ -479,6 +492,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
479 |
label = "Review: in progress..."
|
480 |
yield (
|
481 |
html_with_progress(label, progress),
|
|
|
|
|
482 |
[]
|
483 |
)
|
484 |
await asyncio.sleep(0.1)
|
@@ -489,6 +504,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
489 |
label = "Slides: generating..."
|
490 |
yield (
|
491 |
html_with_progress(label, progress),
|
|
|
|
|
492 |
[]
|
493 |
)
|
494 |
await asyncio.sleep(0.1)
|
@@ -522,6 +539,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
522 |
label = "Scripts: generating..."
|
523 |
yield (
|
524 |
html_with_progress(label, progress),
|
|
|
|
|
525 |
[]
|
526 |
)
|
527 |
await asyncio.sleep(0.1)
|
@@ -556,6 +575,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
556 |
label = "Scripts generated and saved. Reviewing..."
|
557 |
yield (
|
558 |
html_with_progress(label, progress),
|
|
|
|
|
559 |
[]
|
560 |
)
|
561 |
await asyncio.sleep(0.1)
|
@@ -578,6 +599,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
578 |
label = "Lecture materials ready. Generating audio..."
|
579 |
yield (
|
580 |
html_with_progress(label, progress),
|
|
|
|
|
581 |
[]
|
582 |
)
|
583 |
await asyncio.sleep(0.1)
|
@@ -593,12 +616,9 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
593 |
source = getattr(msg, 'source', getattr(msg, 'sender', None))
|
594 |
logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
|
595 |
yield (
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
<p style="margin-top: 20px;">Please try again with a different model or adjust your inputs.</p>
|
600 |
-
</div>
|
601 |
-
""",
|
602 |
[]
|
603 |
)
|
604 |
return
|
@@ -612,6 +632,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
612 |
<p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
|
613 |
</div>
|
614 |
""",
|
|
|
|
|
615 |
[]
|
616 |
)
|
617 |
return
|
@@ -625,6 +647,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
625 |
<p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
|
626 |
</div>
|
627 |
""",
|
|
|
|
|
628 |
[]
|
629 |
)
|
630 |
return
|
@@ -638,6 +662,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
638 |
<p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
|
639 |
</div>
|
640 |
""",
|
|
|
|
|
641 |
[]
|
642 |
)
|
643 |
return
|
@@ -652,127 +678,41 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
652 |
<p style="margin-top: 20px;">Please try again.</p>
|
653 |
</div>
|
654 |
""",
|
|
|
|
|
655 |
[]
|
656 |
)
|
657 |
return
|
658 |
|
659 |
-
audio_files = []
|
660 |
-
audio_urls = []
|
661 |
-
validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
|
662 |
-
if not validated_speaker_wav:
|
663 |
-
logger.error("Invalid speaker audio after conversion, skipping TTS")
|
664 |
-
yield (
|
665 |
-
f"""
|
666 |
-
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
667 |
-
<h2 style="color: #d9534f;">Invalid speaker audio</h2>
|
668 |
-
<p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
|
669 |
-
</div>
|
670 |
-
""",
|
671 |
-
[]
|
672 |
-
)
|
673 |
-
return
|
674 |
-
|
675 |
-
for i, script in enumerate(scripts):
|
676 |
-
cleaned_script = clean_script_text(script)
|
677 |
-
audio_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}.mp3")
|
678 |
-
script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_script.txt")
|
679 |
-
|
680 |
-
try:
|
681 |
-
with open(script_file, "w", encoding="utf-8") as f:
|
682 |
-
f.write(cleaned_script or "")
|
683 |
-
logger.info("Saved script to %s: %s", script_file, cleaned_script)
|
684 |
-
except Exception as e:
|
685 |
-
logger.error("Error saving script to %s: %s", script_file, str(e))
|
686 |
-
|
687 |
-
if not cleaned_script:
|
688 |
-
logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
|
689 |
-
audio_files.append(None)
|
690 |
-
audio_urls.append(None)
|
691 |
-
progress = 90 + ((i + 1) / len(scripts)) * 10
|
692 |
-
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
693 |
-
yield (
|
694 |
-
html_with_progress(label, progress),
|
695 |
-
[]
|
696 |
-
)
|
697 |
-
await asyncio.sleep(0.1)
|
698 |
-
continue
|
699 |
-
|
700 |
-
max_audio_retries = 2
|
701 |
-
for attempt in range(max_audio_retries + 1):
|
702 |
-
try:
|
703 |
-
current_text = cleaned_script
|
704 |
-
if attempt > 0:
|
705 |
-
sentences = re.split(r"[.!?]+", cleaned_script)
|
706 |
-
sentences = [s.strip() for s in sentences if s.strip()][:2]
|
707 |
-
current_text = ". ".join(sentences) + "."
|
708 |
-
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
709 |
-
|
710 |
-
success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
|
711 |
-
if not success:
|
712 |
-
raise RuntimeError("TTS generation failed")
|
713 |
-
|
714 |
-
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
715 |
-
audio_files.append(audio_file)
|
716 |
-
# Use Gradio's file serving URL
|
717 |
-
audio_urls.append(f"/gradio_api/file={audio_file}")
|
718 |
-
progress = 90 + ((i + 1) / len(scripts)) * 10
|
719 |
-
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
720 |
-
yield (
|
721 |
-
html_with_progress(label, progress),
|
722 |
-
[]
|
723 |
-
)
|
724 |
-
await asyncio.sleep(0.1)
|
725 |
-
break
|
726 |
-
except Exception as e:
|
727 |
-
logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
|
728 |
-
if attempt == max_audio_retries:
|
729 |
-
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
730 |
-
audio_files.append(None)
|
731 |
-
audio_urls.append(None)
|
732 |
-
progress = 90 + ((i + 1) / len(scripts)) * 10
|
733 |
-
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
734 |
-
yield (
|
735 |
-
html_with_progress(label, progress),
|
736 |
-
[]
|
737 |
-
)
|
738 |
-
await asyncio.sleep(0.1)
|
739 |
-
break
|
740 |
-
|
741 |
# Collect .txt files for download
|
742 |
txt_files = [f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')]
|
743 |
txt_files.sort() # Sort for consistent display
|
744 |
txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
|
745 |
|
746 |
-
#
|
|
|
747 |
audio_timeline = ""
|
748 |
-
for i
|
749 |
-
|
750 |
-
audio_timeline += f'<audio id="audio-{i+1}" controls src="{audio_url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
|
751 |
-
else:
|
752 |
-
audio_timeline += f'<span id="audio-{i+1}" style="display: inline-block; margin: 0 10px;">slide_{i+1}.mp3 (not generated)</span>'
|
753 |
|
754 |
-
|
755 |
-
|
756 |
-
|
757 |
-
<script src="https://cdn.jsdelivr.net/npm/[email protected]/marked.min.js"></script>
|
758 |
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
759 |
-
<div id="slide-content" style="flex: 1; overflow: auto; padding: 20px; text-align: center; background-color: #fff; color: #333;">
|
760 |
-
<!-- Slides will be rendered here -->
|
761 |
-
</div>
|
762 |
<div style="padding: 20px; text-align: center;">
|
763 |
-
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
764 |
{audio_timeline}
|
765 |
</div>
|
766 |
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
767 |
<button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
768 |
<button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
769 |
<button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
770 |
-
<button style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;"
|
771 |
</div>
|
772 |
</div>
|
773 |
</div>
|
774 |
<script>
|
775 |
-
const lectureData = {
|
776 |
let currentSlide = 0;
|
777 |
const totalSlides = lectureData.slides.length;
|
778 |
let audioElements = [];
|
@@ -783,26 +723,15 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
783 |
audioElements.push(audio);
|
784 |
}}
|
785 |
|
786 |
-
function
|
787 |
-
|
788 |
-
|
789 |
-
const markdownText = lectureData.slides[currentSlide];
|
790 |
-
const htmlContent = marked.parse(markdownText);
|
791 |
-
slideContent.innerHTML = htmlContent;
|
792 |
-
console.log("Rendering slide:", markdownText);
|
793 |
-
console.log("Rendered HTML:", htmlContent);
|
794 |
-
}} else {{
|
795 |
-
slideContent.innerHTML = '<h2>No slide content available</h2>';
|
796 |
-
console.log("No slide content for index:", currentSlide);
|
797 |
-
}}
|
798 |
-
}}
|
799 |
-
|
800 |
-
function updateSlide() {{
|
801 |
-
renderSlide();
|
802 |
-
audioElements.forEach(audio => {{
|
803 |
if (audio && audio.pause) {{
|
804 |
audio.pause();
|
805 |
audio.currentTime = 0;
|
|
|
|
|
|
|
806 |
}}
|
807 |
}});
|
808 |
}}
|
@@ -810,23 +739,25 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
810 |
function prevSlide() {{
|
811 |
if (currentSlide > 0) {{
|
812 |
currentSlide--;
|
813 |
-
|
814 |
}}
|
815 |
}}
|
816 |
|
817 |
function nextSlide() {{
|
818 |
if (currentSlide < totalSlides - 1) {{
|
819 |
currentSlide++;
|
820 |
-
|
821 |
}}
|
822 |
}}
|
823 |
|
824 |
function playAll() {{
|
825 |
-
let index =
|
826 |
function playNext() {{
|
827 |
if (index >= totalSlides) return;
|
|
|
|
|
828 |
const audio = audioElements[index];
|
829 |
-
if (audio && audio.
|
830 |
audio.play().then(() => {{
|
831 |
audio.addEventListener('ended', () => {{
|
832 |
index++;
|
@@ -845,20 +776,226 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
845 |
playNext();
|
846 |
}}
|
847 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
848 |
// Attach event listeners
|
849 |
document.getElementById('prev-btn').addEventListener('click', prevSlide);
|
850 |
document.getElementById('play-btn').addEventListener('click', playAll);
|
851 |
document.getElementById('next-btn').addEventListener('click', nextSlide);
|
|
|
852 |
|
853 |
// Initialize first slide
|
854 |
-
|
855 |
</script>
|
856 |
"""
|
857 |
-
logger.info("Lecture generation completed successfully")
|
858 |
yield (
|
859 |
-
|
860 |
-
txt_file_paths
|
|
|
|
|
861 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
862 |
|
863 |
except Exception as e:
|
864 |
logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
|
@@ -870,6 +1007,8 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
870 |
<p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
|
871 |
</div>
|
872 |
""",
|
|
|
|
|
873 |
[]
|
874 |
)
|
875 |
return
|
@@ -905,7 +1044,8 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
905 |
<p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
|
906 |
</div>
|
907 |
"""
|
908 |
-
slide_display = gr.
|
|
|
909 |
file_output = gr.File(label="Download Generated Files")
|
910 |
|
911 |
speaker_audio.change(
|
@@ -914,11 +1054,26 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
914 |
outputs=speaker_audio
|
915 |
)
|
916 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
917 |
generate_btn.click(
|
918 |
fn=on_generate,
|
919 |
inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
|
920 |
-
outputs=[slide_display,
|
921 |
)
|
922 |
|
923 |
if __name__ == "__main__":
|
924 |
-
demo.launch(allowed_paths=[OUTPUT_DIR]
|
|
|
33 |
logger = logging.getLogger(__name__)
|
34 |
|
35 |
# Set up environment
|
36 |
+
if os.path.exists("/tmp"):
|
37 |
+
OUTPUT_DIR = "/tmp/outputs" # Use /tmp for Huggingface Spaces
|
38 |
+
else:
|
39 |
+
OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") # Fallback for local dev
|
40 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
41 |
logger.info(f"Using output directory: {OUTPUT_DIR}")
|
42 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
43 |
|
44 |
+
# Initialize TTS model at the top
|
45 |
+
try:
|
46 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
47 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
48 |
+
logger.info("TTS model initialized on %s", device)
|
49 |
+
except Exception as e:
|
50 |
+
logger.error("Failed to initialize TTS model: %s", str(e))
|
51 |
+
tts = None
|
52 |
|
53 |
# Define Pydantic model for slide data
|
54 |
class Slide(BaseModel):
|
|
|
401 |
label = "Research: in progress..."
|
402 |
yield (
|
403 |
html_with_progress(label, progress),
|
404 |
+
[],
|
405 |
+
"",
|
406 |
[]
|
407 |
)
|
408 |
await asyncio.sleep(0.1)
|
|
|
449 |
label = "Slides: generating..."
|
450 |
yield (
|
451 |
html_with_progress(label, progress),
|
452 |
+
[],
|
453 |
+
"",
|
454 |
[]
|
455 |
)
|
456 |
await asyncio.sleep(0.1)
|
|
|
476 |
label = "Scripts: generating..."
|
477 |
yield (
|
478 |
html_with_progress(label, progress),
|
479 |
+
[],
|
480 |
+
"",
|
481 |
[]
|
482 |
)
|
483 |
await asyncio.sleep(0.1)
|
|
|
492 |
label = "Review: in progress..."
|
493 |
yield (
|
494 |
html_with_progress(label, progress),
|
495 |
+
[],
|
496 |
+
"",
|
497 |
[]
|
498 |
)
|
499 |
await asyncio.sleep(0.1)
|
|
|
504 |
label = "Slides: generating..."
|
505 |
yield (
|
506 |
html_with_progress(label, progress),
|
507 |
+
[],
|
508 |
+
"",
|
509 |
[]
|
510 |
)
|
511 |
await asyncio.sleep(0.1)
|
|
|
539 |
label = "Scripts: generating..."
|
540 |
yield (
|
541 |
html_with_progress(label, progress),
|
542 |
+
[],
|
543 |
+
"",
|
544 |
[]
|
545 |
)
|
546 |
await asyncio.sleep(0.1)
|
|
|
575 |
label = "Scripts generated and saved. Reviewing..."
|
576 |
yield (
|
577 |
html_with_progress(label, progress),
|
578 |
+
[],
|
579 |
+
"",
|
580 |
[]
|
581 |
)
|
582 |
await asyncio.sleep(0.1)
|
|
|
599 |
label = "Lecture materials ready. Generating audio..."
|
600 |
yield (
|
601 |
html_with_progress(label, progress),
|
602 |
+
[],
|
603 |
+
"",
|
604 |
[]
|
605 |
)
|
606 |
await asyncio.sleep(0.1)
|
|
|
616 |
source = getattr(msg, 'source', getattr(msg, 'sender', None))
|
617 |
logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
|
618 |
yield (
|
619 |
+
error_html,
|
620 |
+
[],
|
621 |
+
"",
|
|
|
|
|
|
|
622 |
[]
|
623 |
)
|
624 |
return
|
|
|
632 |
<p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
|
633 |
</div>
|
634 |
""",
|
635 |
+
[],
|
636 |
+
"",
|
637 |
[]
|
638 |
)
|
639 |
return
|
|
|
647 |
<p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
|
648 |
</div>
|
649 |
""",
|
650 |
+
[],
|
651 |
+
"",
|
652 |
[]
|
653 |
)
|
654 |
return
|
|
|
662 |
<p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
|
663 |
</div>
|
664 |
""",
|
665 |
+
[],
|
666 |
+
"",
|
667 |
[]
|
668 |
)
|
669 |
return
|
|
|
678 |
<p style="margin-top: 20px;">Please try again.</p>
|
679 |
</div>
|
680 |
""",
|
681 |
+
[],
|
682 |
+
"",
|
683 |
[]
|
684 |
)
|
685 |
return
|
686 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
687 |
# Collect .txt files for download
|
688 |
txt_files = [f for f in os.listdir(OUTPUT_DIR) if f.endswith('.txt')]
|
689 |
txt_files.sort() # Sort for consistent display
|
690 |
txt_file_paths = [os.path.join(OUTPUT_DIR, f) for f in txt_files]
|
691 |
|
692 |
+
# Initialize audio timeline placeholders
|
693 |
+
audio_urls = [None] * len(scripts)
|
694 |
audio_timeline = ""
|
695 |
+
for i in range(len(scripts)):
|
696 |
+
audio_timeline += f'<audio id="audio-{i+1}" controls style="display: inline-block; margin: 0 10px; width: 200px;"><source src="" type="audio/mpeg"></audio>'
|
|
|
|
|
|
|
697 |
|
698 |
+
# Display lecture materials immediately
|
699 |
+
slides_json = json.dumps({"slides": markdown_slides, "audioFiles": audio_urls})
|
700 |
+
html_controls = f"""
|
|
|
701 |
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
|
|
|
|
|
|
702 |
<div style="padding: 20px; text-align: center;">
|
703 |
+
<div id="audio-timeline" style="display: flex; justify-content: center; margin-bottom: 10px;">
|
704 |
{audio_timeline}
|
705 |
</div>
|
706 |
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
707 |
<button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
708 |
<button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
709 |
<button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
710 |
+
<button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">🖥️</button>
|
711 |
</div>
|
712 |
</div>
|
713 |
</div>
|
714 |
<script>
|
715 |
+
const lectureData = {slides_json};
|
716 |
let currentSlide = 0;
|
717 |
const totalSlides = lectureData.slides.length;
|
718 |
let audioElements = [];
|
|
|
723 |
audioElements.push(audio);
|
724 |
}}
|
725 |
|
726 |
+
function updateSlideDisplay() {{
|
727 |
+
window.updateSlideContent(lectureData.slides[currentSlide]);
|
728 |
+
audioElements.forEach((audio, index) => {{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
729 |
if (audio && audio.pause) {{
|
730 |
audio.pause();
|
731 |
audio.currentTime = 0;
|
732 |
+
if (index === currentSlide && audio.src) {{
|
733 |
+
audio.play().catch(e => console.error('Audio play failed:', e));
|
734 |
+
}}
|
735 |
}}
|
736 |
}});
|
737 |
}}
|
|
|
739 |
function prevSlide() {{
|
740 |
if (currentSlide > 0) {{
|
741 |
currentSlide--;
|
742 |
+
updateSlideDisplay();
|
743 |
}}
|
744 |
}}
|
745 |
|
746 |
function nextSlide() {{
|
747 |
if (currentSlide < totalSlides - 1) {{
|
748 |
currentSlide++;
|
749 |
+
updateSlideDisplay();
|
750 |
}}
|
751 |
}}
|
752 |
|
753 |
function playAll() {{
|
754 |
+
let index = currentSlide;
|
755 |
function playNext() {{
|
756 |
if (index >= totalSlides) return;
|
757 |
+
currentSlide = index;
|
758 |
+
updateSlideDisplay();
|
759 |
const audio = audioElements[index];
|
760 |
+
if (audio && audio.src) {{
|
761 |
audio.play().then(() => {{
|
762 |
audio.addEventListener('ended', () => {{
|
763 |
index++;
|
|
|
776 |
playNext();
|
777 |
}}
|
778 |
|
779 |
+
function toggleFullScreen() {{
|
780 |
+
const container = document.getElementById('lecture-container');
|
781 |
+
if (!document.fullscreenElement) {{
|
782 |
+
container.requestFullscreen().catch(err => {{
|
783 |
+
console.error(`Error attempting to enable full-screen mode: ${{err.message}}`);
|
784 |
+
}});
|
785 |
+
}} else {{
|
786 |
+
document.exitFullscreen();
|
787 |
+
}}
|
788 |
+
}}
|
789 |
+
|
790 |
// Attach event listeners
|
791 |
document.getElementById('prev-btn').addEventListener('click', prevSlide);
|
792 |
document.getElementById('play-btn').addEventListener('click', playAll);
|
793 |
document.getElementById('next-btn').addEventListener('click', nextSlide);
|
794 |
+
document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
|
795 |
|
796 |
// Initialize first slide
|
797 |
+
updateSlideDisplay();
|
798 |
</script>
|
799 |
"""
|
|
|
800 |
yield (
|
801 |
+
html_controls,
|
802 |
+
txt_file_paths,
|
803 |
+
markdown_slides[0],
|
804 |
+
[]
|
805 |
)
|
806 |
+
|
807 |
+
# Audio generation
|
808 |
+
audio_files = []
|
809 |
+
validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
|
810 |
+
if not validated_speaker_wav:
|
811 |
+
logger.error("Invalid speaker audio after conversion, skipping TTS")
|
812 |
+
yield (
|
813 |
+
html_controls,
|
814 |
+
txt_file_paths,
|
815 |
+
markdown_slides[0],
|
816 |
+
[]
|
817 |
+
)
|
818 |
+
return
|
819 |
+
|
820 |
+
for i, script in enumerate(scripts):
|
821 |
+
cleaned_script = clean_script_text(script)
|
822 |
+
audio_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}.mp3")
|
823 |
+
script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_script.txt")
|
824 |
+
|
825 |
+
try:
|
826 |
+
with open(script_file, "w", encoding="utf-8") as f:
|
827 |
+
f.write(cleaned_script or "")
|
828 |
+
logger.info("Saved script to %s: %s", script_file, cleaned_script)
|
829 |
+
except Exception as e:
|
830 |
+
logger.error("Error saving script to %s: %s", script_file, str(e))
|
831 |
+
|
832 |
+
if not cleaned_script:
|
833 |
+
logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
|
834 |
+
audio_files.append(None)
|
835 |
+
audio_urls[i] = None
|
836 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10
|
837 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
838 |
+
yield (
|
839 |
+
html_controls,
|
840 |
+
txt_file_paths,
|
841 |
+
markdown_slides[currentSlide if 'currentSlide' in locals() else 0],
|
842 |
+
[]
|
843 |
+
)
|
844 |
+
await asyncio.sleep(0.1)
|
845 |
+
continue
|
846 |
+
|
847 |
+
max_audio_retries = 2
|
848 |
+
for attempt in range(max_audio_retries + 1):
|
849 |
+
try:
|
850 |
+
current_text = cleaned_script
|
851 |
+
if attempt > 0:
|
852 |
+
sentences = re.split(r"[.!?]+", cleaned_script)
|
853 |
+
sentences = [s.strip() for s in sentences if s.strip()][:2]
|
854 |
+
current_text = ". ".join(sentences) + "."
|
855 |
+
logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
|
856 |
+
|
857 |
+
success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
|
858 |
+
if not success:
|
859 |
+
raise RuntimeError("TTS generation failed")
|
860 |
+
|
861 |
+
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
862 |
+
audio_files.append(audio_file)
|
863 |
+
audio_urls[i] = f"/gradio_api/file={audio_file}"
|
864 |
+
# Update the audio element's src
|
865 |
+
audio_timeline = ""
|
866 |
+
for j, url in enumerate(audio_urls):
|
867 |
+
if url:
|
868 |
+
audio_timeline += f'<audio id="audio-{j+1}" controls src="{url}" style="display: inline-block; margin: 0 10px; width: 200px;"></audio>'
|
869 |
+
else:
|
870 |
+
audio_timeline += f'<audio id="audio-{j+1}" controls style="display: inline-block; margin: 0 10px; width: 200px;"><source src="" type="audio/mpeg"></audio>'
|
871 |
+
html_controls = f"""
|
872 |
+
<div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
|
873 |
+
<div style="padding: 20px; text-align: center;">
|
874 |
+
<div id="audio-timeline" style="display: flex; justify-content: center; margin-bottom: 10px;">
|
875 |
+
{audio_timeline}
|
876 |
+
</div>
|
877 |
+
<div style="display: flex; justify-content: center; margin-bottom: 10px;">
|
878 |
+
<button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
|
879 |
+
<button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
|
880 |
+
<button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
|
881 |
+
<button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">🖥️</button>
|
882 |
+
</div>
|
883 |
+
</div>
|
884 |
+
</div>
|
885 |
+
<script>
|
886 |
+
const lectureData = {json.dumps({"slides": markdown_slides, "audioFiles": audio_urls})};
|
887 |
+
let currentSlide = {currentSlide if 'currentSlide' in locals() else 0};
|
888 |
+
const totalSlides = lectureData.slides.length;
|
889 |
+
let audioElements = [];
|
890 |
+
|
891 |
+
// Populate audio elements
|
892 |
+
for (let i = 0; i < totalSlides; i++) {{
|
893 |
+
const audio = document.getElementById(`audio-${{i+1}}`);
|
894 |
+
audioElements.push(audio);
|
895 |
+
}}
|
896 |
+
|
897 |
+
function updateSlideDisplay() {{
|
898 |
+
window.updateSlideContent(lectureData.slides[currentSlide]);
|
899 |
+
audioElements.forEach((audio, index) => {{
|
900 |
+
if (audio && audio.pause) {{
|
901 |
+
audio.pause();
|
902 |
+
audio.currentTime = 0;
|
903 |
+
if (index === currentSlide && audio.src) {{
|
904 |
+
audio.play().catch(e => console.error('Audio play failed:', e));
|
905 |
+
}}
|
906 |
+
}}
|
907 |
+
}});
|
908 |
+
}}
|
909 |
+
|
910 |
+
function prevSlide() {{
|
911 |
+
if (currentSlide > 0) {{
|
912 |
+
currentSlide--;
|
913 |
+
updateSlideDisplay();
|
914 |
+
}}
|
915 |
+
}}
|
916 |
+
|
917 |
+
function nextSlide() {{
|
918 |
+
if (currentSlide < totalSlides - 1) {{
|
919 |
+
currentSlide++;
|
920 |
+
updateSlideDisplay();
|
921 |
+
}}
|
922 |
+
}}
|
923 |
+
|
924 |
+
function playAll() {{
|
925 |
+
let index = currentSlide;
|
926 |
+
function playNext() {{
|
927 |
+
if (index >= totalSlides) return;
|
928 |
+
currentSlide = index;
|
929 |
+
updateSlideDisplay();
|
930 |
+
const audio = audioElements[index];
|
931 |
+
if (audio && audio.src) {{
|
932 |
+
audio.play().then(() => {{
|
933 |
+
audio.addEventListener('ended', () => {{
|
934 |
+
index++;
|
935 |
+
playNext();
|
936 |
+
}}, {{ once: true }});
|
937 |
+
}}).catch(e => {{
|
938 |
+
console.error('Audio play failed:', e);
|
939 |
+
index++;
|
940 |
+
playNext();
|
941 |
+
}});
|
942 |
+
}} else {{
|
943 |
+
index++;
|
944 |
+
playNext();
|
945 |
+
}}
|
946 |
+
}}
|
947 |
+
playNext();
|
948 |
+
}}
|
949 |
+
|
950 |
+
function toggleFullScreen() {{
|
951 |
+
const container = document.getElementById('lecture-container');
|
952 |
+
if (!document.fullscreenElement) {{
|
953 |
+
container.requestFullscreen().catch(err => {{
|
954 |
+
console.error(`Error attempting to enable full-screen mode: ${{err.message}}`);
|
955 |
+
}});
|
956 |
+
}} else {{
|
957 |
+
document.exitFullscreen();
|
958 |
+
}}
|
959 |
+
}}
|
960 |
+
|
961 |
+
// Attach event listeners
|
962 |
+
document.getElementById('prev-btn').addEventListener('click', prevSlide);
|
963 |
+
document.getElementById('play-btn').addEventListener('click', playAll);
|
964 |
+
document.getElementById('next-btn').addEventListener('click', nextSlide);
|
965 |
+
document.getElementById('fullscreen-btn').addEventListener('click', toggleFullScreen);
|
966 |
+
|
967 |
+
// Initialize first slide
|
968 |
+
updateSlideDisplay();
|
969 |
+
</script>
|
970 |
+
"""
|
971 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10
|
972 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
973 |
+
yield (
|
974 |
+
html_controls,
|
975 |
+
txt_file_paths,
|
976 |
+
markdown_slides[currentSlide if 'currentSlide' in locals() else 0],
|
977 |
+
[]
|
978 |
+
)
|
979 |
+
await asyncio.sleep(0.1)
|
980 |
+
break
|
981 |
+
except Exception as e:
|
982 |
+
logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
|
983 |
+
if attempt == max_audio_retries:
|
984 |
+
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
985 |
+
audio_files.append(None)
|
986 |
+
audio_urls[i] = None
|
987 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10
|
988 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
989 |
+
yield (
|
990 |
+
html_controls,
|
991 |
+
txt_file_paths,
|
992 |
+
markdown_slides[currentSlide if 'currentSlide' in locals() else 0],
|
993 |
+
[]
|
994 |
+
)
|
995 |
+
await asyncio.sleep(0.1)
|
996 |
+
break
|
997 |
+
|
998 |
+
logger.info("Lecture generation completed successfully")
|
999 |
|
1000 |
except Exception as e:
|
1001 |
logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
|
|
|
1007 |
<p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
|
1008 |
</div>
|
1009 |
""",
|
1010 |
+
[],
|
1011 |
+
"",
|
1012 |
[]
|
1013 |
)
|
1014 |
return
|
|
|
1044 |
<p style="margin-top: 10px; font-size: 16px;">Please Generate lecture content via the form on the left first before lecture begins</p>
|
1045 |
</div>
|
1046 |
"""
|
1047 |
+
slide_display = gr.Markdown(label="Lecture Slides", value="Waiting for lecture content...")
|
1048 |
+
controls_display = gr.HTML(label="Controls", value=default_slide_html)
|
1049 |
file_output = gr.File(label="Download Generated Files")
|
1050 |
|
1051 |
speaker_audio.change(
|
|
|
1054 |
outputs=speaker_audio
|
1055 |
)
|
1056 |
|
1057 |
+
# JavaScript to update slide content dynamically
|
1058 |
+
demo.load(
|
1059 |
+
fn=None,
|
1060 |
+
inputs=None,
|
1061 |
+
outputs=None,
|
1062 |
+
_js="""
|
1063 |
+
() => {
|
1064 |
+
window.updateSlideContent = (content) => {
|
1065 |
+
document.querySelector('#slide-display textarea').value = content;
|
1066 |
+
document.querySelector('#slide-display').dispatchEvent(new Event('input'));
|
1067 |
+
};
|
1068 |
+
}
|
1069 |
+
"""
|
1070 |
+
)
|
1071 |
+
|
1072 |
generate_btn.click(
|
1073 |
fn=on_generate,
|
1074 |
inputs=[api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, speaker_audio, num_slides],
|
1075 |
+
outputs=[controls_display, file_output, slide_display, gr.State()]
|
1076 |
)
|
1077 |
|
1078 |
if __name__ == "__main__":
|
1079 |
+
demo.launch(allowed_paths=[OUTPUT_DIR])
|