Jaward commited on
Commit
89a534d
·
verified ·
1 Parent(s): b622114

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +466 -426
app.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import os
2
  import json
3
  import re
@@ -6,8 +10,6 @@ import asyncio
6
  import logging
7
  import torch
8
  import random
9
- import tempfile
10
- import zipfile
11
  from serpapi import GoogleSearch
12
  from pydantic import BaseModel
13
  from autogen_agentchat.agents import AssistantAgent
@@ -21,10 +23,9 @@ from autogen_ext.models.ollama import OllamaChatCompletionClient
21
  from markdown_pdf import MarkdownPdf, Section
22
  import traceback
23
  import soundfile as sf
24
- import shutil
25
  from pydub import AudioSegment
26
  from TTS.api import TTS
27
- from gradio_pdf import PDF
28
 
29
  # Set up logging
30
  logging.basicConfig(
@@ -37,8 +38,11 @@ logging.basicConfig(
37
  )
38
  logger = logging.getLogger(__name__)
39
 
40
- # Set up environment
 
 
41
  os.environ["COQUI_TOS_AGREED"] = "1"
 
42
 
43
  # Define Pydantic model for slide data
44
  class Slide(BaseModel):
@@ -83,6 +87,17 @@ def search_web(query: str, serpapi_key: str) -> str:
83
  logger.error("Unexpected error during search: %s", str(e))
84
  return f"Unexpected error during search: {str(e)}"
85
 
 
 
 
 
 
 
 
 
 
 
 
86
  # Function to get model client based on selected service
87
  def get_model_client(service, api_key):
88
  if service == "OpenAI-gpt-4o-2024-08-06":
@@ -129,7 +144,7 @@ def clean_script_text(script):
129
  return script
130
 
131
  # Helper function to validate and convert speaker audio (MP3 or WAV)
132
- async def validate_and_convert_speaker_audio(speaker_audio, temp_dir):
133
  if not os.path.exists(speaker_audio):
134
  logger.error("Speaker audio file does not exist: %s", speaker_audio)
135
  return None
@@ -142,8 +157,9 @@ async def validate_and_convert_speaker_audio(speaker_audio, temp_dir):
142
  audio = AudioSegment.from_mp3(speaker_audio)
143
  # Convert to mono, 22050 Hz
144
  audio = audio.set_channels(1).set_frame_rate(22050)
145
- speaker_wav = os.path.join(temp_dir, "speaker_converted.wav")
146
- audio.export(speaker_wav, format="wav")
 
147
  elif ext == ".wav":
148
  speaker_wav = speaker_audio
149
  else:
@@ -161,9 +177,9 @@ async def validate_and_convert_speaker_audio(speaker_audio, temp_dir):
161
  if data.ndim == 2:
162
  logger.info("Converting stereo WAV to mono: %s", speaker_wav)
163
  data = data.mean(axis=1)
164
- mono_wav = os.path.join(temp_dir, "speaker_mono.wav")
165
- sf.write(mono_wav, data, samplerate)
166
- speaker_wav = mono_wav
167
 
168
  logger.info("Validated speaker audio: %s", speaker_wav)
169
  return speaker_wav
@@ -198,7 +214,9 @@ def extract_json_from_message(message):
198
  match = re.search(pattern, content, re.DOTALL)
199
  if match:
200
  try:
201
- return json.loads(match.group(1))
 
 
202
  except json.JSONDecodeError as e:
203
  logger.error("Failed to parse JSON from TextMessage: %s, Content: %s", e, content)
204
  # Fallback: Try raw JSON array
@@ -206,7 +224,9 @@ def extract_json_from_message(message):
206
  match = re.search(json_pattern, content, re.DOTALL)
207
  if match:
208
  try:
209
- return json.loads(match.group(0))
 
 
210
  except json.JSONDecodeError as e:
211
  logger.error("Failed to parse fallback JSON from TextMessage: %s, Content: %s", e, content)
212
  # Fallback: Try any JSON-like structure
@@ -243,14 +263,18 @@ def extract_json_from_message(message):
243
  match = re.search(pattern, content, re.DOTALL)
244
  if match:
245
  try:
246
- return json.loads(match.group(1))
 
 
247
  except json.JSONDecodeError as e:
248
  logger.error("Failed to parse JSON from HandoffMessage context: %s, Content: %s", e, content)
249
  json_pattern = r"\[\s*\{.*?\}\s*\]"
250
  match = re.search(json_pattern, content, re.DOTALL)
251
  if match:
252
  try:
253
- return json.loads(match.group(0))
 
 
254
  except json.JSONDecodeError as e:
255
  logger.error("Failed to parse fallback JSON from HandoffMessage context: %s, Content: %s", e, content)
256
  try:
@@ -269,7 +293,7 @@ def extract_json_from_message(message):
269
  return None
270
 
271
  # Function to generate Markdown and convert to PDF (portrait, centered)
272
- def generate_slides_pdf(slides, temp_dir):
273
  pdf = MarkdownPdf()
274
 
275
  for slide in slides:
@@ -288,48 +312,14 @@ def generate_slides_pdf(slides, temp_dir):
288
  """
289
  pdf.add_section(Section(markdown_content, toc=False))
290
 
291
- pdf_file = os.path.join(temp_dir, "slides.pdf")
292
- pdf.save(pdf_file)
293
-
294
- logger.info("Generated PDF slides (portrait): %s", pdf_file)
295
- return pdf_file
296
-
297
- # Helper function to create ZIP file of outputs
298
- def create_outputs_zip(temp_dir, slides, audio_files, scripts):
299
- zip_path = os.path.join(temp_dir, "lecture_outputs.zip")
300
- with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
301
- # Add slides PDF
302
- pdf_file = os.path.join(temp_dir, "slides.pdf")
303
- if os.path.exists(pdf_file):
304
- zipf.write(pdf_file, "slides.pdf")
305
-
306
- # Add audio files
307
- for i, audio_file in enumerate(audio_files):
308
- if audio_file and os.path.exists(audio_file):
309
- zipf.write(audio_file, f"slide_{i+1}.wav")
310
-
311
- # Add raw and cleaned scripts
312
- for i in range(len(slides)):
313
- raw_script_file = os.path.join(temp_dir, f"slide_{i+1}_raw_script.txt")
314
- cleaned_script_file = os.path.join(temp_dir, f"slide_{i+1}_script.txt")
315
- if os.path.exists(raw_script_file):
316
- zipf.write(raw_script_file, f"slide_{i+1}_raw_script.txt")
317
- if os.path.exists(cleaned_script_file):
318
- zipf.write(cleaned_script_file, f"slide_{i+1}_script.txt")
319
-
320
- logger.info("Created ZIP file: %s", zip_path)
321
- return zip_path
322
-
323
- # Helper function for progress HTML
324
- def html_with_progress(label, progress):
325
- return f"""
326
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
327
- <div style="width: 100%; background-color: #FFFFFF; border-radius: 10px; overflow: hidden; margin-bottom: 20px;">
328
- <div style="width: {progress}%; height: 30px; background-color: #4CAF50; border-radius: 10px;"></div>
329
- </div>
330
- <h2 style="font-style: italic; color: #555;">{label}</h2>
331
- </div>
332
- """
333
 
334
  # Async function to update audio preview
335
  async def update_audio_preview(audio_file):
@@ -341,37 +331,50 @@ async def update_audio_preview(audio_file):
341
  # Async function to generate lecture materials and audio
342
  async def on_generate(api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides):
343
  if not serpapi_key:
344
- yield html_with_progress("SerpApi key required. Please provide a valid key.", 0)
 
 
 
 
 
345
  return
346
 
347
- # Create temporary directory
348
- with tempfile.TemporaryDirectory() as temp_dir:
349
- # Initialize TTS model
350
- tts = None
351
- try:
352
- device = "cuda" if torch.cuda.is_available() else "cpu"
353
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
354
- logger.info("TTS model initialized on %s", device)
355
- except Exception as e:
356
- logger.error("Failed to initialize TTS model: %s", str(e))
357
- yield html_with_progress(f"TTS model initialization failed: {str(e)}", 0)
358
- return
359
 
360
- model_client = get_model_client(api_service, api_key)
361
-
362
- research_agent = AssistantAgent(
363
- name="research_agent",
364
- model_client=model_client,
365
- handoffs=["slide_agent"],
366
- system_message="You are a Research Agent. Use the search_web tool to gather information on the topic and keywords from the initial message. Summarize the findings concisely in a single message, then use the handoff_to_slide_agent tool to pass the task to the Slide Agent. Do not produce any other output.",
367
- tools=[search_web]
368
- )
369
- slide_agent = AssistantAgent(
370
- name="slide_agent",
371
- model_client=model_client,
372
- handoffs=["script_agent"],
373
- system_message=f"""
374
- You are a Slide Agent. Using the research from the conversation history, generate EXACTLY {num_slides} content slides on the topic, plus 1 quiz slide, 1 assignment slide, and 1 thank-you slide, for a TOTAL of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, with each slide as an object with 'title' and 'content' keys. Ensure the JSON is valid and contains precisely {num_slides + 3} slides. If the slide count is incorrect, adjust the output to meet this requirement before proceeding. Do not include explanatory text or comments. After outputting the JSON, use the handoff_to_script_agent tool.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  Example for 2 content slides:
376
  ```json
377
  [
@@ -382,14 +385,14 @@ Example for 2 content slides:
382
  {{"title": "Thank You", "content": "Thank you message"}}
383
  ]
384
  ```""",
385
- output_content_type=None,
386
- reflect_on_tool_use=False
387
- )
388
- script_agent = AssistantAgent(
389
- name="script_agent",
390
- model_client=model_client,
391
- handoffs=["feynman_agent"],
392
- system_message=f"""
393
  You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
394
  Example for 1 content slide:
395
  ```json
@@ -400,373 +403,412 @@ Example for 1 content slide:
400
  "Thanks for, um, attending today!"
401
  ]
402
  ```""",
403
- output_content_type=None,
404
- reflect_on_tool_use=False
405
- )
406
- feynman_agent = AssistantAgent(
407
- name="feynman_agent",
408
- model_client=model_client,
409
- handoffs=[],
410
- system_message=f"""
411
- You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
412
  Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
413
  """)
414
-
415
- swarm = Swarm(
416
- participants=[research_agent, slide_agent, script_agent, feynman_agent],
417
- termination_condition=HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
418
- )
419
-
420
- progress = 0
421
- label = "Research: in progress..."
422
- yield html_with_progress(label, progress)
423
- await asyncio.sleep(0.1)
424
-
425
- initial_message = f"""
426
- Lecture Title: {title}
427
- Topic: {topic}
428
- Additional Instructions: {instructions}
429
- Audience: {lecture_type}
430
- Number of Content Slides: {num_slides}
431
- Please start by researching the topic.
432
- """
433
- logger.info("Starting lecture generation for topic: %s", topic)
434
-
435
- slides = None
436
- scripts = None
 
 
 
 
 
 
 
 
437
  max_slide_retries = 2
438
  slide_retry_count = 0
439
 
440
  while slide_retry_count <= max_slide_retries:
441
- try:
442
- logger.info("Research Agent starting (Slide attempt %d/%d)", slide_retry_count + 1, max_slide_retries)
443
- task_result = await Console(swarm.run_stream(task=initial_message))
444
- logger.info("Swarm execution completed")
445
-
446
- script_retry_count = 0
447
- max_script_retries = 2
 
 
 
448
 
449
- for message in task_result.messages:
450
- source = getattr(message, 'source', getattr(message, 'sender', None))
451
- logger.debug("Processing message from %s, type: %s, content: %s", source, type(message), message.to_text() if hasattr(message, 'to_text') else str(message))
452
-
453
- if isinstance(message, HandoffMessage):
454
- logger.info("Handoff from %s to %s, Context: %s", source, message.target, message.context)
455
- if source == "research_agent" and message.target == "slide_agent":
456
- progress = 25
457
- label = "Slides: generating..."
458
- yield html_with_progress(label, progress)
459
- await asyncio.sleep(0.1)
460
- elif source == "slide_agent" and message.target == "script_agent":
461
- if slides is None:
462
- logger.warning("Slide Agent handoff without slides JSON")
463
- extracted_json = extract_json_from_message(message)
464
- if extracted_json:
465
- slides = extracted_json
466
- logger.info("Extracted slides JSON from HandoffMessage context: %s", slides)
467
- if slides is None:
468
- label = "Slides: failed to generate..."
469
- yield html_with_progress(label, progress)
470
- await asyncio.sleep(0.1)
471
- progress = 50
472
- label = "Scripts: generating..."
473
- yield html_with_progress(label, progress)
474
- await asyncio.sleep(0.1)
475
- elif source == "script_agent" and message.target == "feynman_agent":
476
- if scripts is None:
477
- logger.warning("Script Agent handoff without scripts JSON")
478
- extracted_json = extract_json_from_message(message)
479
- if extracted_json:
480
- scripts = extracted_json
481
- logger.info("Extracted scripts JSON from HandoffMessage context: %s", scripts)
482
- progress = 75
483
- label = "Review: in progress..."
484
- yield html_with_progress(label, progress)
485
- await asyncio.sleep(0.1)
486
-
487
- elif source == "research_agent" and isinstance(message, TextMessage) and "handoff_to_slide_agent" in message.content:
488
- logger.info("Research Agent completed research")
489
  progress = 25
490
  label = "Slides: generating..."
491
  yield html_with_progress(label, progress)
492
  await asyncio.sleep(0.1)
493
-
494
- elif source == "slide_agent" and isinstance(message, (TextMessage, StructuredMessage)):
495
- logger.debug("Slide Agent message received: %s", message.to_text())
496
- extracted_json = extract_json_from_message(message)
497
- if extracted_json:
498
- slides = extracted_json
499
- logger.info("Slide Agent generated %d slides: %s", len(slides), slides)
500
- expected_slide_count = num_slides + 3
501
- if len(slides) != expected_slide_count:
502
- logger.warning("Generated %d slides, expected %d. Retrying...", len(slides), expected_slide_count)
503
- slide_retry_count += 1
504
- if slide_retry_count <= max_slide_retries:
505
- # Re-prompt slide agent
506
- retry_message = TextMessage(
507
- content=f"Please generate EXACTLY {num_slides} content slides plus 1 quiz, 1 assignment, and 1 thank-you slide (total {num_slides + 3}).",
508
- source="user",
509
- recipient="slide_agent"
510
- )
511
- task_result.messages.append(retry_message)
512
- slides = None
513
- continue
514
- else:
515
- yield html_with_progress(f"Failed to generate correct number of slides after {max_slide_retries} retries. Expected {expected_slide_count}, got {len(slides)}.", progress)
516
- return
517
- # Save slide content to individual files
518
- for i, slide in enumerate(slides):
519
- content_file = os.path.join(temp_dir, f"slide_{i+1}_content.txt")
520
- try:
521
- with open(content_file, "w", encoding="utf-8") as f:
522
- f.write(slide["content"])
523
- logger.info("Saved slide content to %s: %s", content_file, slide["content"])
524
- except Exception as e:
525
- logger.error("Error saving slide content to %s: %s", content_file, str(e))
526
- progress = 50
527
- label = "Scripts: generating..."
528
  yield html_with_progress(label, progress)
529
  await asyncio.sleep(0.1)
530
- else:
531
- logger.warning("No JSON extracted from slide_agent message: %s", message.to_text())
532
-
533
- elif source == "script_agent" and isinstance(message, (TextMessage, StructuredMessage)):
534
- logger.debug("Script Agent message received: %s", message.to_text())
535
- extracted_json = extract_json_from_message(message)
536
- if extracted_json:
537
- scripts = extracted_json
538
- logger.info("Script Agent generated scripts for %d slides: %s", len(scripts), scripts)
539
- # Save raw scripts to individual files
540
- for i, script in enumerate(scripts):
541
- script_file = os.path.join(temp_dir, f"slide_{i+1}_raw_script.txt")
542
- try:
543
- with open(script_file, "w", encoding="utf-8") as f:
544
- f.write(script)
545
- logger.info("Saved raw script to %s: %s", script_file, script)
546
- except Exception as e:
547
- logger.error("Error saving raw script to %s: %s", script_file, str(e))
548
- progress = 75
549
- label = "Scripts generated and saved. Reviewing..."
550
- yield html_with_progress(label, progress)
551
- await asyncio.sleep(0.1)
552
- else:
553
- logger.warning("No JSON extracted from script_agent message: %s", message.to_text())
554
- if script_retry_count < max_script_retries:
555
- script_retry_count += 1
556
- logger.info("Retrying script generation (attempt %d/%d)", script_retry_count, max_script_retries)
557
- # Re-prompt script agent
558
- retry_message = TextMessage(
559
- content="Please generate scripts for the slides as per your instructions.",
560
- source="user",
561
- recipient="script_agent"
562
- )
563
- task_result.messages.append(retry_message)
564
- continue
565
-
566
- elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
567
- logger.info("Feynman Agent completed lecture review: %s", message.content)
568
- progress = 90
569
- label = "Lecture materials ready. Generating audio..."
570
  yield html_with_progress(label, progress)
571
  await asyncio.sleep(0.1)
572
 
573
- logger.info("Slides state: %s", "Generated" if slides else "None")
574
- logger.info("Scripts state: %s", "Generated" if scripts else "None")
575
- if not slides or not scripts:
576
- error_message = f"Failed to generate {'slides and scripts' if not slides and not scripts else 'slides' if not slides else 'scripts'}"
577
- error_message += f". Received {len(slides) if slides else 0} slides and {len(scripts) if scripts else 0} scripts."
578
- logger.error("%s", error_message)
579
- yield html_with_progress(error_message, progress)
580
- return
581
-
582
- expected_slide_count = num_slides + 3
583
- if len(slides) != expected_slide_count:
584
- logger.error("Final validation failed: Expected %d slides, received %d", expected_slide_count, len(slides))
585
- yield html_with_progress(f"Incorrect number of slides. Expected {expected_slide_count}, got {len(slides)}.", progress)
586
- return
587
-
588
- if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
589
- logger.error("Scripts are not a list of strings: %s", scripts)
590
- yield html_with_progress("Invalid script format. Scripts must be a list of strings.", progress)
591
- return
592
 
593
- if len(scripts) != expected_slide_count:
594
- logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
595
- yield html_with_progress(f"Mismatch in slides and scripts. Generated {len(slides)} slides but {len(scripts)} scripts.", progress)
596
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
 
598
- # Generate PDF from slides
599
- pdf_file = generate_slides_pdf(slides, temp_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
 
601
- audio_files = []
602
- speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
603
- validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio, temp_dir)
604
- if not validated_speaker_wav:
605
- logger.error("Invalid speaker audio after conversion, skipping TTS")
606
- yield html_with_progress("Invalid speaker audio. Please upload a valid MP3 or WAV file.", progress)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  return
608
-
609
- # Process audio generation sequentially with retries
610
- for i, script in enumerate(scripts):
611
- cleaned_script = clean_script_text(script)
612
- audio_file = os.path.join(temp_dir, f"slide_{i+1}.wav")
613
- script_file = os.path.join(temp_dir, f"slide_{i+1}_script.txt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
614
 
615
- # Save cleaned script
616
- try:
617
- with open(script_file, "w", encoding="utf-8") as f:
618
- f.write(cleaned_script or "")
619
- logger.info("Saved cleaned script to %s: %s", script_file, cleaned_script)
620
- except Exception as e:
621
- logger.error("Error saving cleaned script to %s: %s", script_file, str(e))
622
 
623
- if not cleaned_script:
624
- logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
 
 
 
 
 
 
 
 
 
625
  audio_files.append(None)
626
  progress = 90 + ((i + 1) / len(scripts)) * 10
627
  label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
628
  yield html_with_progress(label, progress)
629
  await asyncio.sleep(0.1)
630
- continue
631
-
632
- max_retries = 2
633
- for attempt in range(max_retries + 1):
634
- try:
635
- current_text = cleaned_script
636
- if attempt > 0:
637
- sentences = re.split(r"[.!?]+", cleaned_script)
638
- sentences = [s.strip() for s in sentences if s.strip()][:2]
639
- current_text = ". ".join(sentences) + "."
640
- logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
641
-
642
- success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
643
- if not success:
644
- raise RuntimeError("TTS generation failed")
645
-
646
- logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
647
- audio_files.append(audio_file)
648
- progress = 90 + ((i + 1) / len(scripts)) * 10
649
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
650
- yield html_with_progress(label, progress)
651
- await asyncio.sleep(0.1)
652
- break
653
- except Exception as e:
654
- logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
655
- if attempt == max_retries:
656
- logger.error("Max retries reached for slide %d, skipping", i + 1)
657
- audio_files.append(None)
658
- progress = 90 + ((i + 1) / len(scripts)) * 10
659
- label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
660
- yield html_with_progress(label, progress)
661
- await asyncio.sleep(0.1)
662
- break
663
-
664
- # Create ZIP file of all outputs
665
- zip_path = create_outputs_zip(temp_dir, slides, audio_files, scripts)
666
-
667
- # Prepare UI output
668
- slides_info = json.dumps({"slides": [
669
- {"title": slide["title"], "content": slide["content"]}
670
- for slide in slides
671
- ], "audioFiles": audio_files})
672
-
673
- html_output = f"""
674
- <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
675
- <div id="slide-content" style="flex: 1; overflow: auto;">
676
- <div id="pdf-viewer"></div>
677
- </div>
678
- <div style="padding: 20px;">
679
- <div id="progress-bar" style="width: 100%; height: 5px; background-color: #ddd; border-radius: 2px; margin-bottom: 10px;">
680
- <div id="progress-fill" style="width: {(1/len(slides)*100)}%; height: 100%; background-color: #4CAF50; border-radius: 2px;"></div>
681
- </div>
682
- <div style="display: flex; justify-content: center; margin-bottom: 10px;">
683
- <button onclick="prevSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
684
- <button onclick="togglePlay()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
685
- <button onclick="nextSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
686
- </div>
687
- <p id="slide-counter" style="text-align: center;">Slide 1 of {len(slides)}</p>
688
- </div>
689
  </div>
690
- <script>
691
- const lectureData = {slides_info};
692
- let currentSlide = 0;
693
- const totalSlides = lectureData.slides.length;
694
- const slideCounter = document.getElementById('slide-counter');
695
- const progressFill = document.getElementById('progress-fill');
696
- let audioElements = [];
697
- let currentAudio = null;
 
 
 
698
 
699
- for (let i = 0; i < totalSlides; i++) {{
700
- if (lectureData.audioFiles && lectureData.audioFiles[i]) {{
701
- const audio = new Audio('file://' + lectureData.audioFiles[i]);
702
- audioElements.push(audio);
703
- }} else {{
704
- audioElements.push(null);
705
- }}
706
- }}
707
 
708
- function updateSlide() {{
709
- slideCounter.textContent = `Slide ${{currentSlide + 1}} of ${{totalSlides}}`;
710
- progressFill.style.width = `${{(currentSlide + 1) / totalSlides * 100}}%`;
711
 
712
- if (currentAudio) {{
713
- currentAudio.pause();
714
- currentAudio.currentTime = 0;
715
- }}
716
 
717
- if (audioElements[currentSlide]) {{
718
- currentAudio = audioElements[currentSlide];
719
- currentAudio.play().catch(e => console.error('Audio play failed:', e));
720
- }} else {{
721
- currentAudio = null;
722
- }}
723
- }}
724
 
725
- function prevSlide() {{
726
- if (currentSlide > 0) {{
727
- currentSlide--;
728
- updateSlide();
729
- }}
730
- }}
731
 
732
- function nextSlide() {{
733
- if (currentSlide < totalSlides - 1) {{
734
- currentSlide++;
735
- updateSlide();
736
- }}
737
- }}
738
 
739
- function togglePlay() {{
740
- if (!audioElements[currentSlide]) return;
741
- if (currentAudio.paused) {{
742
- currentAudio.play().catch(e => console.error('Audio play failed:', e));
743
- }} else {{
744
- currentAudio.pause();
745
- }}
746
- }}
747
 
748
- audioElements.forEach((audio, index) => {{
749
- if (audio) {{
750
- audio.addEventListener('ended', () => {{
751
- if (index < totalSlides - 1) {{
752
- nextSlide();
753
- }}
754
- }});
755
  }}
756
  }});
757
- </script>
758
- """
759
- yield {
760
- "pdf": pdf_file,
761
- "html": html_output,
762
- "zip": zip_path
763
- }
764
- return
765
-
766
- except Exception as e:
767
- logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
768
- yield html_with_progress(f"Error during lecture generation: {str(e)}", progress)
769
- return
 
 
 
 
770
 
771
  # Gradio interface
772
  with gr.Blocks(title="Agent Feynman") as demo:
@@ -801,8 +843,6 @@ with gr.Blocks(title="Agent Feynman") as demo:
801
  </div>
802
  """
803
  slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html)
804
- pdf_display = PDF(label="Lecture Slides PDF")
805
- outputs_zip = gr.File(label="Download Outputs (PDF, Audio, Scripts)")
806
 
807
  speaker_audio.change(
808
  fn=update_audio_preview,
@@ -813,7 +853,7 @@ with gr.Blocks(title="Agent Feynman") as demo:
813
  generate_btn.click(
814
  fn=on_generate,
815
  inputs=[api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides],
816
- outputs=[slide_display, pdf_display, outputs_zip]
817
  )
818
 
819
  if __name__ == "__main__":
 
1
+ # Note: For Huggingface Spaces, ensure the Dockerfile includes:
2
+ # RUN mkdir -p /tmp/cache/
3
+ # RUN chmod a+rwx -R /tmp/cache/
4
+ # ENV TRANSFORMERS_CACHE=/tmp/cache/
5
  import os
6
  import json
7
  import re
 
10
  import logging
11
  import torch
12
  import random
 
 
13
  from serpapi import GoogleSearch
14
  from pydantic import BaseModel
15
  from autogen_agentchat.agents import AssistantAgent
 
23
  from markdown_pdf import MarkdownPdf, Section
24
  import traceback
25
  import soundfile as sf
26
+ import tempfile
27
  from pydub import AudioSegment
28
  from TTS.api import TTS
 
29
 
30
  # Set up logging
31
  logging.basicConfig(
 
38
  )
39
  logger = logging.getLogger(__name__)
40
 
41
+ # Set up environment for Huggingface Spaces
42
+ OUTPUT_DIR = "/data/outputs" # Persistent storage in Huggingface Spaces
43
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
44
  os.environ["COQUI_TOS_AGREED"] = "1"
45
+ gr.set_static_paths(paths=[OUTPUT_DIR]) # Expose OUTPUT_DIR for file access
46
 
47
  # Define Pydantic model for slide data
48
  class Slide(BaseModel):
 
87
  logger.error("Unexpected error during search: %s", str(e))
88
  return f"Unexpected error during search: {str(e)}"
89
 
90
+ # Define helper function for progress HTML
91
+ def html_with_progress(label, progress):
92
+ return f"""
93
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
94
+ <div style="width: 100%; background-color: #FFFFFF; border-radius: 10px; overflow: hidden; margin-bottom: 20px;">
95
+ <div style="width: {progress}%; height: 30px; background-color: #4CAF50; border-radius: 10px;"></div>
96
+ </div>
97
+ <h2 style="font-style: italic; color: #555;">{label}</h2>
98
+ </div>
99
+ """
100
+
101
  # Function to get model client based on selected service
102
  def get_model_client(service, api_key):
103
  if service == "OpenAI-gpt-4o-2024-08-06":
 
144
  return script
145
 
146
  # Helper function to validate and convert speaker audio (MP3 or WAV)
147
+ async def validate_and_convert_speaker_audio(speaker_audio):
148
  if not os.path.exists(speaker_audio):
149
  logger.error("Speaker audio file does not exist: %s", speaker_audio)
150
  return None
 
157
  audio = AudioSegment.from_mp3(speaker_audio)
158
  # Convert to mono, 22050 Hz
159
  audio = audio.set_channels(1).set_frame_rate(22050)
160
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
161
+ audio.export(temp_file.name, format="wav")
162
+ speaker_wav = temp_file.name
163
  elif ext == ".wav":
164
  speaker_wav = speaker_audio
165
  else:
 
177
  if data.ndim == 2:
178
  logger.info("Converting stereo WAV to mono: %s", speaker_wav)
179
  data = data.mean(axis=1)
180
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
181
+ sf.write(temp_file.name, data, samplerate)
182
+ speaker_wav = temp_file.name
183
 
184
  logger.info("Validated speaker audio: %s", speaker_wav)
185
  return speaker_wav
 
214
  match = re.search(pattern, content, re.DOTALL)
215
  if match:
216
  try:
217
+ parsed = json.loads(match.group(1))
218
+ logger.info("Parsed JSON from TextMessage: %s", parsed)
219
+ return parsed
220
  except json.JSONDecodeError as e:
221
  logger.error("Failed to parse JSON from TextMessage: %s, Content: %s", e, content)
222
  # Fallback: Try raw JSON array
 
224
  match = re.search(json_pattern, content, re.DOTALL)
225
  if match:
226
  try:
227
+ parsed = json.loads(match.group(0))
228
+ logger.info("Parsed fallback JSON from TextMessage: %s", parsed)
229
+ return parsed
230
  except json.JSONDecodeError as e:
231
  logger.error("Failed to parse fallback JSON from TextMessage: %s, Content: %s", e, content)
232
  # Fallback: Try any JSON-like structure
 
263
  match = re.search(pattern, content, re.DOTALL)
264
  if match:
265
  try:
266
+ parsed = json.loads(match.group(1))
267
+ logger.info("Parsed JSON from HandoffMessage context: %s", parsed)
268
+ return parsed
269
  except json.JSONDecodeError as e:
270
  logger.error("Failed to parse JSON from HandoffMessage context: %s, Content: %s", e, content)
271
  json_pattern = r"\[\s*\{.*?\}\s*\]"
272
  match = re.search(json_pattern, content, re.DOTALL)
273
  if match:
274
  try:
275
+ parsed = json.loads(match.group(0))
276
+ logger.info("Parsed fallback JSON from HandoffMessage context: %s", parsed)
277
+ return parsed
278
  except json.JSONDecodeError as e:
279
  logger.error("Failed to parse fallback JSON from HandoffMessage context: %s, Content: %s", e, content)
280
  try:
 
293
  return None
294
 
295
  # Function to generate Markdown and convert to PDF (portrait, centered)
296
+ def generate_slides_pdf(slides):
297
  pdf = MarkdownPdf()
298
 
299
  for slide in slides:
 
312
  """
313
  pdf.add_section(Section(markdown_content, toc=False))
314
 
315
+ pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
316
+ try:
317
+ pdf.save(pdf_file)
318
+ logger.info("Generated PDF slides (portrait): %s", pdf_file)
319
+ return pdf_file
320
+ except Exception as e:
321
+ logger.error("Failed to generate PDF: %s", str(e))
322
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
  # Async function to update audio preview
325
  async def update_audio_preview(audio_file):
 
331
  # Async function to generate lecture materials and audio
332
  async def on_generate(api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides):
333
  if not serpapi_key:
334
+ yield f"""
335
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
336
+ <h2 style="color: #d9534f;">SerpApi key required</h2>
337
+ <p style="margin-top: 20px;">Please provide a valid SerpApi key and try again.</p>
338
+ </div>
339
+ """
340
  return
341
 
342
+ # Ensure output directory exists
343
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
344
+ logger.info("Output directory set to: %s", OUTPUT_DIR)
 
 
 
 
 
 
 
 
 
345
 
346
+ # Initialize TTS model
347
+ tts = None
348
+ try:
349
+ device = "cuda" if torch.cuda.is_available() else "cpu"
350
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
351
+ logger.info("TTS model initialized on %s", device)
352
+ except Exception as e:
353
+ logger.error("Failed to initialize TTS model: %s", str(e))
354
+ yield f"""
355
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
356
+ <h2 style="color: #d9534f;">TTS model initialization failed</h2>
357
+ <p style="margin-top: 20px;">Error: {str(e)}</p>
358
+ <p>Please ensure the Coqui TTS model is properly installed and try again.</p>
359
+ </div>
360
+ """
361
+ return
362
+
363
+ model_client = get_model_client(api_service, api_key)
364
+
365
+ research_agent = AssistantAgent(
366
+ name="research_agent",
367
+ model_client=model_client,
368
+ handoffs=["slide_agent"],
369
+ system_message="You are a Research Agent. Use the search_web tool to gather information on the topic and keywords from the initial message. Summarize the findings concisely in a single message, then use the handoff_to_slide_agent tool to pass the task to the Slide Agent. Do not produce any other output.",
370
+ tools=[search_web]
371
+ )
372
+ slide_agent = AssistantAgent(
373
+ name="slide_agent",
374
+ model_client=model_client,
375
+ handoffs=["script_agent"],
376
+ system_message=f"""
377
+ You are a Slide Agent. Using the research from the conversation history, generate EXACTLY {num_slides} content slides, plus 1 quiz slide, 1 assignment slide, and 1 thank-you slide, for a TOTAL of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, with each slide as an object with 'title' and 'content' keys. Ensure the JSON is valid, contains EXACTLY {num_slides + 3} slides, and matches the specified count before proceeding. Do not include explanatory text, comments, or other messages. After outputting, use the handoff_to_script_agent tool.
378
  Example for 2 content slides:
379
  ```json
380
  [
 
385
  {{"title": "Thank You", "content": "Thank you message"}}
386
  ]
387
  ```""",
388
+ output_content_type=None,
389
+ reflect_on_tool_use=False
390
+ )
391
+ script_agent = AssistantAgent(
392
+ name="script_agent",
393
+ model_client=model_client,
394
+ handoffs=["feynman_agent"],
395
+ system_message=f"""
396
  You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
397
  Example for 1 content slide:
398
  ```json
 
403
  "Thanks for, um, attending today!"
404
  ]
405
  ```""",
406
+ output_content_type=None,
407
+ reflect_on_tool_use=False
408
+ )
409
+ feynman_agent = AssistantAgent(
410
+ name="feynman_agent",
411
+ model_client=model_client,
412
+ handoffs=[],
413
+ system_message=f"""
414
+ You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that EXACTLY {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
415
  Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
416
  """)
417
+
418
+ swarm = Swarm(
419
+ participants=[research_agent, slide_agent, script_agent, feynman_agent],
420
+ termination_condition=HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
421
+ )
422
+
423
+ progress = 0
424
+ label = "Research: in progress..."
425
+ yield html_with_progress(label, progress)
426
+ await asyncio.sleep(0.1)
427
+
428
+ initial_message = f"""
429
+ Lecture Title: {title}
430
+ Topic: {topic}
431
+ Additional Instructions: {instructions}
432
+ Audience: {lecture_type}
433
+ Number of Content Slides: {num_slides}
434
+ Please start by researching the topic.
435
+ """
436
+ logger.info("Starting lecture generation for topic: %s", topic)
437
+
438
+ slides = None
439
+ scripts = None
440
+ error_html = """
441
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
442
+ <h2 style="color: #d9534f;">Failed to generate lecture materials</h2>
443
+ <p style="margin-top: 20px;">Please try again with different parameters or a different model.</p>
444
+ </div>
445
+ """
446
+
447
+ try:
448
  max_slide_retries = 2
449
  slide_retry_count = 0
450
 
451
  while slide_retry_count <= max_slide_retries:
452
+ logger.info("Slide generation attempt %d/%d", slide_retry_count + 1, max_slide_retries)
453
+ task_result = await Console(swarm.run_stream(task=initial_message))
454
+ logger.info("Swarm execution completed")
455
+
456
+ script_retry_count = 0
457
+ max_script_retries = 2
458
+
459
+ for message in task_result.messages:
460
+ source = getattr(message, 'source', getattr(message, 'sender', None))
461
+ logger.debug("Processing message from %s, type: %s, content: %s", source, type(message), message.to_text() if hasattr(message, 'to_text') else str(message))
462
 
463
+ if isinstance(message, HandoffMessage):
464
+ logger.info("Handoff from %s to %s", source, message.target)
465
+ if source == "research_agent" and message.target == "slide_agent":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  progress = 25
467
  label = "Slides: generating..."
468
  yield html_with_progress(label, progress)
469
  await asyncio.sleep(0.1)
470
+ elif source == "slide_agent" and message.target == "script_agent":
471
+ if slides is None:
472
+ logger.warning("Slide Agent handoff without slides JSON")
473
+ extracted_json = extract_json_from_message(message)
474
+ if extracted_json:
475
+ slides = extracted_json
476
+ logger.info("Extracted slides JSON from HandoffMessage context: %s", slides)
477
+ if slides is None:
478
+ label = "Slides: failed to generate..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
  yield html_with_progress(label, progress)
480
  await asyncio.sleep(0.1)
481
+ progress = 50
482
+ label = "Scripts: generating..."
483
+ yield html_with_progress(label, progress)
484
+ await asyncio.sleep(0.1)
485
+ elif source == "script_agent" and message.target == "feynman_agent":
486
+ if scripts is None:
487
+ logger.warning("Script Agent handoff without scripts JSON")
488
+ extracted_json = extract_json_from_message(message)
489
+ if extracted_json:
490
+ scripts = extracted_json
491
+ logger.info("Extracted scripts JSON from HandoffMessage context: %s", scripts)
492
+ progress = 75
493
+ label = "Review: in progress..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  yield html_with_progress(label, progress)
495
  await asyncio.sleep(0.1)
496
 
497
+ elif source == "research_agent" and isinstance(message, TextMessage) and "handoff_to_slide_agent" in message.content:
498
+ logger.info("Research Agent completed research")
499
+ progress = 25
500
+ label = "Slides: generating..."
501
+ yield html_with_progress(label, progress)
502
+ await asyncio.sleep(0.1)
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
+ elif source == "slide_agent" and isinstance(message, (TextMessage, StructuredMessage)):
505
+ logger.debug("Slide Agent message received: %s", message.to_text())
506
+ extracted_json = extract_json_from_message(message)
507
+ if extracted_json:
508
+ slides = extracted_json
509
+ logger.info("Slide Agent generated %d slides: %s", len(slides), slides)
510
+ # Save slide content to individual files
511
+ for i, slide in enumerate(slides):
512
+ content_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_content.txt")
513
+ try:
514
+ with open(content_file, "w", encoding="utf-8") as f:
515
+ f.write(slide["content"])
516
+ logger.info("Saved slide content to %s: %s", content_file, slide["content"])
517
+ except Exception as e:
518
+ logger.error("Error saving slide content to %s: %s", content_file, str(e))
519
+ progress = 50
520
+ label = "Scripts: generating..."
521
+ yield html_with_progress(label, progress)
522
+ await asyncio.sleep(0.1)
523
+ else:
524
+ logger.warning("No JSON extracted from slide_agent message: %s", message.to_text())
525
 
526
+ elif source == "script_agent" and isinstance(message, (TextMessage, StructuredMessage)):
527
+ logger.debug("Script Agent message received: %s", message.to_text())
528
+ extracted_json = extract_json_from_message(message)
529
+ if extracted_json:
530
+ scripts = extracted_json
531
+ logger.info("Script Agent generated scripts for %d slides: %s", len(scripts), scripts)
532
+ # Save raw scripts to individual files
533
+ for i, script in enumerate(scripts):
534
+ script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_raw_script.txt")
535
+ try:
536
+ with open(script_file, "w", encoding="utf-8") as f:
537
+ f.write(script)
538
+ logger.info("Saved raw script to %s: %s", script_file, script)
539
+ except Exception as e:
540
+ logger.error("Error saving raw script to %s: %s", script_file, str(e))
541
+ progress = 75
542
+ label = "Scripts generated and saved. Reviewing..."
543
+ yield html_with_progress(label, progress)
544
+ await asyncio.sleep(0.1)
545
+ else:
546
+ logger.warning("No JSON extracted from script_agent message: %s", message.to_text())
547
+ if script_retry_count < max_script_retries:
548
+ script_retry_count += 1
549
+ logger.info("Retrying script generation (attempt %d/%d)", script_retry_count, max_script_retries)
550
+ retry_message = TextMessage(
551
+ content="Please generate scripts for the slides as per your instructions.",
552
+ source="user",
553
+ recipient="script_agent"
554
+ )
555
+ task_result.messages.append(retry_message)
556
+ continue
557
 
558
+ elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
559
+ logger.info("Feynman Agent completed lecture review: %s", message.content)
560
+ progress = 90
561
+ label = "Lecture materials ready. Generating audio..."
562
+ yield html_with_progress(label, progress)
563
+ await asyncio.sleep(0.1)
564
+
565
+ # Validate slide count
566
+ expected_slide_count = num_slides + 3
567
+ if slides and len(slides) == expected_slide_count:
568
+ logger.info("Slide count validated: %d slides received", len(slides))
569
+ break
570
+ else:
571
+ logger.warning("Incorrect slide count: expected %d, got %d", expected_slide_count, len(slides) if slides else 0)
572
+ slide_retry_count += 1
573
+ slides = None
574
+ if slide_retry_count <= max_slide_retries:
575
+ logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count + 1, max_slide_retries)
576
+ task_result = await Console(swarm.run_stream(task=initial_message))
577
+ else:
578
+ logger.error("Max slide retries reached")
579
+ yield f"""
580
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
581
+ <h2 style="color: #d9534f;">Incorrect number of slides</h2>
582
+ <p style="margin-top: 20px;">Expected {expected_slide_count} slides ({num_slides} content slides + quiz, assignment, thank-you), but generated {len(slides) if slides else 0}. Please try again with a different model.</p>
583
+ </div>
584
+ """
585
  return
586
+
587
+ logger.info("Slides state: %s", "Generated" if slides else "None")
588
+ logger.info("Scripts state: %s", "Generated" if scripts else "None")
589
+ if not slides or not scripts:
590
+ error_message = f"Failed to generate {'slides and scripts' if not slides and not scripts else 'slides' if not slides else 'scripts'}"
591
+ error_message += f". Received {len(slides) if slides else 0} slides and {len(scripts) if scripts else 0} scripts."
592
+ logger.error("%s", error_message)
593
+ logger.debug("Dumping all messages for debugging:")
594
+ for msg in task_result.messages:
595
+ source = getattr(msg, 'source', getattr(msg, 'sender', None))
596
+ logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
597
+ yield error_html
598
+ return
599
+
600
+ if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
601
+ logger.error("Scripts are not a list of strings: %s", scripts)
602
+ yield f"""
603
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
604
+ <h2 style="color: #d9534f;">Invalid script format</h2>
605
+ <p style="margin-top: 20px;">Scripts must be a list of strings. Please try again.</p>
606
+ </div>
607
+ """
608
+ return
609
+
610
+ if len(scripts) != expected_slide_count:
611
+ logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
612
+ yield f"""
613
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
614
+ <h2 style="color: #d9534f;">Mismatch in slides and scripts</h2>
615
+ <p style="margin-top: 20px;">Generated {len(slides)} slides but {len(scripts)} scripts. Please try again.</p>
616
+ </div>
617
+ """
618
+ return
619
+
620
+ # Generate PDF from slides
621
+ try:
622
+ pdf_file = generate_slides_pdf(slides)
623
+ except Exception as e:
624
+ logger.error("PDF generation failed: %s", str(e))
625
+ yield f"""
626
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
627
+ <h2 style="color: #d9534f;">PDF generation failed</h2>
628
+ <p style="margin-top: 20px;">Error: {str(e)}</p>
629
+ <p>Please try again or check the lecture_generation.log for details.</p>
630
+ </div>
631
+ """
632
+ return
633
+
634
+ audio_files = []
635
+ speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
636
+ validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
637
+ if not validated_speaker_wav:
638
+ logger.error("Invalid speaker audio after conversion, skipping TTS")
639
+ yield f"""
640
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
641
+ <h2 style="color: #d9534f;">Invalid speaker audio</h2>
642
+ <p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
643
+ </div>
644
+ """
645
+ return
646
+
647
+ # Process audio generation sequentially with retries
648
+ for i, script in enumerate(scripts):
649
+ cleaned_script = clean_script_text(script)
650
+ audio_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}.wav")
651
+ script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_script.txt")
652
+
653
+ # Save cleaned script
654
+ try:
655
+ with open(script_file, "w", encoding="utf-8") as f:
656
+ f.write(cleaned_script or "")
657
+ logger.info("Saved cleaned script to %s: %s", script_file, cleaned_script)
658
+ except Exception as e:
659
+ logger.error("Error saving cleaned script to %s: %s", script_file, str(e))
660
+
661
+ if not cleaned_script:
662
+ logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
663
+ audio_files.append(None)
664
+ progress = 90 + ((i + 1) / len(scripts)) * 10
665
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
666
+ yield html_with_progress(label, progress)
667
+ await asyncio.sleep(0.1)
668
+ continue
669
+
670
+ max_retries = 2
671
+ for attempt in range(max_retries + 1):
672
+ try:
673
+ current_text = cleaned_script
674
+ if attempt > 0:
675
+ sentences = re.split(r"[.!?]+", cleaned_script)
676
+ sentences = [s.strip() for s in sentences if s.strip()][:2]
677
+ current_text = ". ".join(sentences) + "."
678
+ logger.info("Retry %d for slide %d with simplified text: %s", attempt, i + 1, current_text)
679
 
680
+ success = generate_xtts_audio(tts, current_text, validated_speaker_wav, audio_file)
681
+ if not success:
682
+ raise RuntimeError("TTS generation failed")
 
 
 
 
683
 
684
+ logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
685
+ audio_files.append(audio_file)
686
+ progress = 90 + ((i + 1) / len(scripts)) * 10
687
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
688
+ yield html_with_progress(label, progress)
689
+ await asyncio.sleep(0.1)
690
+ break
691
+ except Exception as e:
692
+ logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
693
+ if attempt == max_retries:
694
+ logger.error("Max retries reached for slide %d, skipping", i + 1)
695
  audio_files.append(None)
696
  progress = 90 + ((i + 1) / len(scripts)) * 10
697
  label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
698
  yield html_with_progress(label, progress)
699
  await asyncio.sleep(0.1)
700
+ break
701
+
702
+ # Prepare output HTML with gr.File for PDF and gr.FileExplorer for outputs
703
+ slides_info = json.dumps({"slides": [
704
+ {"title": slide["title"], "content": slide["content"]}
705
+ for slide in slides
706
+ ], "audioFiles": audio_files})
707
+
708
+ html_output = f"""
709
+ <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between; padding: 20px;">
710
+ <div style="flex: 1; overflow: auto;">
711
+ <h3>Lecture Slides</h3>
712
+ <p>Download or view the slides PDF below (opens in your browser's PDF viewer):</p>
713
+ <gradio-file value="{pdf_file}" label="Slides PDF" file_types=[".pdf"]></gradio-file>
714
+ <h3>Generated Files</h3>
715
+ <p>Explore all generated files (PDF, audio, scripts) in the output directory:</p>
716
+ <gradio-file-explorer glob="/data/outputs/*" label="Output Directory"></gradio-file-explorer>
717
+ </div>
718
+ <div style="padding: 20px;">
719
+ <div id="progress-bar" style="width: 100%; height: 5px; background-color: #ddd; border-radius: 2px; margin-bottom: 10px;">
720
+ <div id="progress-fill" style="width: {(1/len(slides)*100)}%; height: 100%; background-color: #4CAF50; border-radius: 2px;"></div>
721
+ </div>
722
+ <div style="display: flex; justify-content: center; margin-bottom: 10px;">
723
+ <button onclick="prevSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏮</button>
724
+ <button onclick="togglePlay()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏯</button>
725
+ <button onclick="nextSlide()" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer;">⏭</button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  </div>
727
+ <p id="slide-counter" style="text-align: center;">Slide 1 of {len(slides)}</p>
728
+ </div>
729
+ </div>
730
+ <script>
731
+ const lectureData = {slides_info};
732
+ let currentSlide = 0;
733
+ const totalSlides = lectureData.slides.length;
734
+ const slideCounter = document.getElementById('slide-counter');
735
+ const progressFill = document.getElementById('progress-fill');
736
+ let audioElements = [];
737
+ let currentAudio = null;
738
 
739
+ for (let i = 0; i < totalSlides; i++) {{
740
+ if (lectureData.audioFiles && lectureData.audioFiles[i]) {{
741
+ const audio = new Audio('/gradio_api/file=' + lectureData.audioFiles[i]);
742
+ audioElements.push(audio);
743
+ }} else {{
744
+ audioElements.push(null);
745
+ }}
746
+ }}
747
 
748
+ function updateSlide() {{
749
+ slideCounter.textContent = `Slide ${{currentSlide + 1}} of ${{totalSlides}}`;
750
+ progressFill.style.width = `${{(currentSlide + 1) / totalSlides * 100}}%`;
751
 
752
+ if (currentAudio) {{
753
+ currentAudio.pause();
754
+ currentAudio.currentTime = 0;
755
+ }}
756
 
757
+ if (audioElements[currentSlide]) {{
758
+ currentAudio = audioElements[currentSlide];
759
+ currentAudio.play().catch(e => console.error('Audio play failed:', e));
760
+ }} else {{
761
+ currentAudio = null;
762
+ }}
763
+ }}
764
 
765
+ function prevSlide() {{
766
+ if (currentSlide > 0) {{
767
+ currentSlide--;
768
+ updateSlide();
769
+ }}
770
+ }}
771
 
772
+ function nextSlide() {{
773
+ if (currentSlide < totalSlides - 1) {{
774
+ currentSlide++;
775
+ updateSlide();
776
+ }}
777
+ }}
778
 
779
+ function togglePlay() {{
780
+ if (!audioElements[currentSlide]) return;
781
+ if (currentAudio.paused) {{
782
+ currentAudio.play().catch(e => console.error('Audio play failed:', e));
783
+ }} else {{
784
+ currentAudio.pause();
785
+ }}
786
+ }}
787
 
788
+ audioElements.forEach((audio, index) => {{
789
+ if (audio) {{
790
+ audio.addEventListener('ended', () => {{
791
+ if (index < totalSlides - 1) {{
792
+ nextSlide();
 
 
793
  }}
794
  }});
795
+ }}
796
+ }});
797
+ </script>
798
+ """
799
+ logger.info("Lecture generation completed successfully")
800
+ yield html_output
801
+
802
+ except Exception as e:
803
+ logger.error("Error during lecture generation: %s\n%s", str(e), traceback.format_exc())
804
+ yield f"""
805
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
806
+ <h2 style="color: #d9534f;">Error during lecture generation</h2>
807
+ <p style="margin-top: 10px; font-size: 16px;">{str(e)}</p>
808
+ <p style="margin-top: 20px;">Please try again or check the lecture_generation.log for details.</p>
809
+ </div>
810
+ """
811
+ return
812
 
813
  # Gradio interface
814
  with gr.Blocks(title="Agent Feynman") as demo:
 
843
  </div>
844
  """
845
  slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html)
 
 
846
 
847
  speaker_audio.change(
848
  fn=update_audio_preview,
 
853
  generate_btn.click(
854
  fn=on_generate,
855
  inputs=[api_service, api_key, serpapi_key, title, topic, instructions, lecture_type, speaker_audio, num_slides],
856
+ outputs=[slide_display]
857
  )
858
 
859
  if __name__ == "__main__":