Jaward commited on
Commit
3c6142d
·
verified ·
1 Parent(s): 89a534d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +268 -240
app.py CHANGED
@@ -1,7 +1,3 @@
1
- # Note: For Huggingface Spaces, ensure the Dockerfile includes:
2
- # RUN mkdir -p /tmp/cache/
3
- # RUN chmod a+rwx -R /tmp/cache/
4
- # ENV TRANSFORMERS_CACHE=/tmp/cache/
5
  import os
6
  import json
7
  import re
@@ -38,11 +34,15 @@ logging.basicConfig(
38
  )
39
  logger = logging.getLogger(__name__)
40
 
41
- # Set up environment for Huggingface Spaces
42
- OUTPUT_DIR = "/data/outputs" # Persistent storage in Huggingface Spaces
 
 
 
 
43
  os.makedirs(OUTPUT_DIR, exist_ok=True)
 
44
  os.environ["COQUI_TOS_AGREED"] = "1"
45
- gr.set_static_paths(paths=[OUTPUT_DIR]) # Expose OUTPUT_DIR for file access
46
 
47
  # Define Pydantic model for slide data
48
  class Slide(BaseModel):
@@ -145,9 +145,14 @@ def clean_script_text(script):
145
 
146
  # Helper function to validate and convert speaker audio (MP3 or WAV)
147
  async def validate_and_convert_speaker_audio(speaker_audio):
148
- if not os.path.exists(speaker_audio):
149
- logger.error("Speaker audio file does not exist: %s", speaker_audio)
150
- return None
 
 
 
 
 
151
 
152
  try:
153
  # Check file extension
@@ -157,7 +162,7 @@ async def validate_and_convert_speaker_audio(speaker_audio):
157
  audio = AudioSegment.from_mp3(speaker_audio)
158
  # Convert to mono, 22050 Hz
159
  audio = audio.set_channels(1).set_frame_rate(22050)
160
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
161
  audio.export(temp_file.name, format="wav")
162
  speaker_wav = temp_file.name
163
  elif ext == ".wav":
@@ -177,7 +182,7 @@ async def validate_and_convert_speaker_audio(speaker_audio):
177
  if data.ndim == 2:
178
  logger.info("Converting stereo WAV to mono: %s", speaker_wav)
179
  data = data.mean(axis=1)
180
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
181
  sf.write(temp_file.name, data, samplerate)
182
  speaker_wav = temp_file.name
183
 
@@ -209,35 +214,52 @@ def extract_json_from_message(message):
209
  if not isinstance(content, str):
210
  logger.warning("TextMessage content is not a string: %s", content)
211
  return None
212
- # Try standard JSON block
 
213
  pattern = r"```json\s*(.*?)\s*```"
214
  match = re.search(pattern, content, re.DOTALL)
215
  if match:
216
  try:
217
- parsed = json.loads(match.group(1))
218
- logger.info("Parsed JSON from TextMessage: %s", parsed)
219
- return parsed
220
  except json.JSONDecodeError as e:
221
- logger.error("Failed to parse JSON from TextMessage: %s, Content: %s", e, content)
222
- # Fallback: Try raw JSON array
223
- json_pattern = r"\[\s*\{.*?\}\s*\]"
224
- match = re.search(json_pattern, content, re.DOTALL)
225
- if match:
226
- try:
227
- parsed = json.loads(match.group(0))
228
- logger.info("Parsed fallback JSON from TextMessage: %s", parsed)
229
- return parsed
230
- except json.JSONDecodeError as e:
231
- logger.error("Failed to parse fallback JSON from TextMessage: %s, Content: %s", e, content)
232
- # Fallback: Try any JSON-like structure
 
 
 
 
 
 
 
233
  try:
234
- parsed = json.loads(content)
235
- if isinstance(parsed, (list, dict)):
236
- logger.info("Parsed JSON from raw content: %s", parsed)
237
- return parsed
238
- except json.JSONDecodeError:
239
- pass
240
- logger.warning("No JSON found in TextMessage content: %s", content)
 
 
 
 
 
 
 
 
 
241
  return None
242
 
243
  elif isinstance(message, StructuredMessage):
@@ -257,69 +279,59 @@ def extract_json_from_message(message):
257
  for ctx_msg in message.context:
258
  if hasattr(ctx_msg, "content"):
259
  content = ctx_msg.content
260
- logger.debug("Handoff context message content: %s", content)
261
  if isinstance(content, str):
262
  pattern = r"```json\s*(.*?)\s*```"
263
  match = re.search(pattern, content, re.DOTALL)
264
  if match:
265
  try:
266
- parsed = json.loads(match.group(1))
267
- logger.info("Parsed JSON from HandoffMessage context: %s", parsed)
268
- return parsed
269
- except json.JSONDecodeError as e:
270
- logger.error("Failed to parse JSON from HandoffMessage context: %s, Content: %s", e, content)
271
- json_pattern = r"\[\s*\{.*?\}\s*\]"
272
- match = re.search(json_pattern, content, re.DOTALL)
273
- if match:
274
- try:
275
- parsed = json.loads(match.group(0))
276
- logger.info("Parsed fallback JSON from HandoffMessage context: %s", parsed)
277
- return parsed
278
  except json.JSONDecodeError as e:
279
- logger.error("Failed to parse fallback JSON from HandoffMessage context: %s, Content: %s", e, content)
280
- try:
281
- parsed = json.loads(content)
282
- if isinstance(parsed, (list, dict)):
283
- logger.info("Parsed JSON from raw HandoffMessage context: %s", parsed)
284
- return parsed
285
- except json.JSONDecodeError:
286
- pass
 
 
 
 
 
 
287
  elif isinstance(content, dict):
288
  return content.get("slides", content)
 
289
  logger.warning("No JSON found in HandoffMessage context")
290
  return None
291
 
292
  logger.warning("Unsupported message type for JSON extraction: %s", type(message))
293
  return None
294
 
295
- # Function to generate Markdown and convert to PDF (portrait, centered)
296
- def generate_slides_pdf(slides):
297
- pdf = MarkdownPdf()
298
-
299
- for slide in slides:
300
- content_lines = slide['content'].replace('\n', '\n\n')
301
- markdown_content = f"""
302
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; text-align: center; padding: 20px;">
303
- # {slide['title']}
304
-
305
- *Prof. AI Feynman*
306
- *Princeton University, April 26th, 2025*
307
-
308
- {content_lines}
309
  </div>
310
-
311
- ---
312
  """
313
- pdf.add_section(Section(markdown_content, toc=False))
314
-
315
- pdf_file = os.path.join(OUTPUT_DIR, "slides.pdf")
316
- try:
317
- pdf.save(pdf_file)
318
- logger.info("Generated PDF slides (portrait): %s", pdf_file)
319
- return pdf_file
320
  except Exception as e:
321
- logger.error("Failed to generate PDF: %s", str(e))
322
- raise
 
323
 
324
  # Async function to update audio preview
325
  async def update_audio_preview(audio_file):
@@ -339,10 +351,6 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
339
  """
340
  return
341
 
342
- # Ensure output directory exists
343
- os.makedirs(OUTPUT_DIR, exist_ok=True)
344
- logger.info("Output directory set to: %s", OUTPUT_DIR)
345
-
346
  # Initialize TTS model
347
  tts = None
348
  try:
@@ -362,6 +370,9 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
362
 
363
  model_client = get_model_client(api_service, api_key)
364
 
 
 
 
365
  research_agent = AssistantAgent(
366
  name="research_agent",
367
  model_client=model_client,
@@ -374,8 +385,8 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
374
  model_client=model_client,
375
  handoffs=["script_agent"],
376
  system_message=f"""
377
- You are a Slide Agent. Using the research from the conversation history, generate EXACTLY {num_slides} content slides, plus 1 quiz slide, 1 assignment slide, and 1 thank-you slide, for a TOTAL of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, with each slide as an object with 'title' and 'content' keys. Ensure the JSON is valid, contains EXACTLY {num_slides + 3} slides, and matches the specified count before proceeding. Do not include explanatory text, comments, or other messages. After outputting, use the handoff_to_script_agent tool.
378
- Example for 2 content slides:
379
  ```json
380
  [
381
  {{"title": "Slide 1", "content": "Content for slide 1"}},
@@ -393,11 +404,13 @@ Example for 2 content slides:
393
  model_client=model_client,
394
  handoffs=["feynman_agent"],
395
  system_message=f"""
396
- You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
397
- Example for 1 content slide:
398
  ```json
399
  [
400
  "So, this slide, um, covers the main topic in a fun way.",
 
 
401
  "Alright, you know, answer these quiz questions.",
402
  "Here's your, like, assignment to complete.",
403
  "Thanks for, um, attending today!"
@@ -411,8 +424,8 @@ Example for 1 content slide:
411
  model_client=model_client,
412
  handoffs=[],
413
  system_message=f"""
414
- You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that EXACTLY {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
415
- Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
416
  """)
417
 
418
  swarm = Swarm(
@@ -430,10 +443,10 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
430
  Topic: {topic}
431
  Additional Instructions: {instructions}
432
  Audience: {lecture_type}
433
- Number of Content Slides: {num_slides}
434
  Please start by researching the topic.
435
  """
436
- logger.info("Starting lecture generation for topic: %s", topic)
437
 
438
  slides = None
439
  scripts = None
@@ -445,144 +458,147 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
445
  """
446
 
447
  try:
448
- max_slide_retries = 2
 
 
 
449
  slide_retry_count = 0
 
 
450
 
451
- while slide_retry_count <= max_slide_retries:
452
- logger.info("Slide generation attempt %d/%d", slide_retry_count + 1, max_slide_retries)
453
- task_result = await Console(swarm.run_stream(task=initial_message))
454
- logger.info("Swarm execution completed")
455
 
456
- script_retry_count = 0
457
- max_script_retries = 2
458
-
459
- for message in task_result.messages:
460
- source = getattr(message, 'source', getattr(message, 'sender', None))
461
- logger.debug("Processing message from %s, type: %s, content: %s", source, type(message), message.to_text() if hasattr(message, 'to_text') else str(message))
462
-
463
- if isinstance(message, HandoffMessage):
464
- logger.info("Handoff from %s to %s", source, message.target)
465
- if source == "research_agent" and message.target == "slide_agent":
466
- progress = 25
467
- label = "Slides: generating..."
468
- yield html_with_progress(label, progress)
469
- await asyncio.sleep(0.1)
470
- elif source == "slide_agent" and message.target == "script_agent":
471
- if slides is None:
472
- logger.warning("Slide Agent handoff without slides JSON")
473
- extracted_json = extract_json_from_message(message)
474
- if extracted_json:
475
- slides = extracted_json
476
- logger.info("Extracted slides JSON from HandoffMessage context: %s", slides)
477
- if slides is None:
478
- label = "Slides: failed to generate..."
479
- yield html_with_progress(label, progress)
480
- await asyncio.sleep(0.1)
481
- progress = 50
482
- label = "Scripts: generating..."
483
- yield html_with_progress(label, progress)
484
- await asyncio.sleep(0.1)
485
- elif source == "script_agent" and message.target == "feynman_agent":
486
- if scripts is None:
487
- logger.warning("Script Agent handoff without scripts JSON")
488
- extracted_json = extract_json_from_message(message)
489
- if extracted_json:
490
- scripts = extracted_json
491
- logger.info("Extracted scripts JSON from HandoffMessage context: %s", scripts)
492
- progress = 75
493
- label = "Review: in progress..."
494
- yield html_with_progress(label, progress)
495
- await asyncio.sleep(0.1)
496
-
497
- elif source == "research_agent" and isinstance(message, TextMessage) and "handoff_to_slide_agent" in message.content:
498
- logger.info("Research Agent completed research")
499
  progress = 25
500
  label = "Slides: generating..."
501
  yield html_with_progress(label, progress)
502
  await asyncio.sleep(0.1)
503
-
504
- elif source == "slide_agent" and isinstance(message, (TextMessage, StructuredMessage)):
505
- logger.debug("Slide Agent message received: %s", message.to_text())
506
- extracted_json = extract_json_from_message(message)
507
- if extracted_json:
508
- slides = extracted_json
509
- logger.info("Slide Agent generated %d slides: %s", len(slides), slides)
510
- # Save slide content to individual files
511
- for i, slide in enumerate(slides):
512
- content_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_content.txt")
513
- try:
514
- with open(content_file, "w", encoding="utf-8") as f:
515
- f.write(slide["content"])
516
- logger.info("Saved slide content to %s: %s", content_file, slide["content"])
517
- except Exception as e:
518
- logger.error("Error saving slide content to %s: %s", content_file, str(e))
519
- progress = 50
520
- label = "Scripts: generating..."
521
- yield html_with_progress(label, progress)
522
- await asyncio.sleep(0.1)
523
- else:
524
- logger.warning("No JSON extracted from slide_agent message: %s", message.to_text())
525
-
526
- elif source == "script_agent" and isinstance(message, (TextMessage, StructuredMessage)):
527
- logger.debug("Script Agent message received: %s", message.to_text())
528
- extracted_json = extract_json_from_message(message)
529
- if extracted_json:
530
- scripts = extracted_json
531
- logger.info("Script Agent generated scripts for %d slides: %s", len(scripts), scripts)
532
- # Save raw scripts to individual files
533
- for i, script in enumerate(scripts):
534
- script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_raw_script.txt")
535
- try:
536
- with open(script_file, "w", encoding="utf-8") as f:
537
- f.write(script)
538
- logger.info("Saved raw script to %s: %s", script_file, script)
539
- except Exception as e:
540
- logger.error("Error saving raw script to %s: %s", script_file, str(e))
541
- progress = 75
542
- label = "Scripts generated and saved. Reviewing..."
543
- yield html_with_progress(label, progress)
544
- await asyncio.sleep(0.1)
545
- else:
546
- logger.warning("No JSON extracted from script_agent message: %s", message.to_text())
547
- if script_retry_count < max_script_retries:
548
- script_retry_count += 1
549
- logger.info("Retrying script generation (attempt %d/%d)", script_retry_count, max_script_retries)
550
  retry_message = TextMessage(
551
- content="Please generate scripts for the slides as per your instructions.",
552
  source="user",
553
- recipient="script_agent"
554
  )
555
  task_result.messages.append(retry_message)
556
  continue
557
-
558
- elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
559
- logger.info("Feynman Agent completed lecture review: %s", message.content)
560
- progress = 90
561
- label = "Lecture materials ready. Generating audio..."
562
  yield html_with_progress(label, progress)
563
  await asyncio.sleep(0.1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
 
565
- # Validate slide count
566
- expected_slide_count = num_slides + 3
567
- if slides and len(slides) == expected_slide_count:
568
- logger.info("Slide count validated: %d slides received", len(slides))
569
- break
570
- else:
571
- logger.warning("Incorrect slide count: expected %d, got %d", expected_slide_count, len(slides) if slides else 0)
572
- slide_retry_count += 1
573
- slides = None
574
- if slide_retry_count <= max_slide_retries:
575
- logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count + 1, max_slide_retries)
576
- task_result = await Console(swarm.run_stream(task=initial_message))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
  else:
578
- logger.error("Max slide retries reached")
579
- yield f"""
580
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
581
- <h2 style="color: #d9534f;">Incorrect number of slides</h2>
582
- <p style="margin-top: 20px;">Expected {expected_slide_count} slides ({num_slides} content slides + quiz, assignment, thank-you), but generated {len(slides) if slides else 0}. Please try again with a different model.</p>
583
- </div>
584
- """
585
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
586
 
587
  logger.info("Slides state: %s", "Generated" if slides else "None")
588
  logger.info("Scripts state: %s", "Generated" if scripts else "None")
@@ -594,7 +610,22 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
594
  for msg in task_result.messages:
595
  source = getattr(msg, 'source', getattr(msg, 'sender', None))
596
  logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
597
- yield error_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  return
599
 
600
  if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
@@ -607,7 +638,7 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
607
  """
608
  return
609
 
610
- if len(scripts) != expected_slide_count:
611
  logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
612
  yield f"""
613
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
@@ -617,22 +648,19 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
617
  """
618
  return
619
 
620
- # Generate PDF from slides
621
- try:
622
- pdf_file = generate_slides_pdf(slides)
623
- except Exception as e:
624
- logger.error("PDF generation failed: %s", str(e))
625
  yield f"""
626
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
627
- <h2 style="color: #d9534f;">PDF generation failed</h2>
628
- <p style="margin-top: 20px;">Error: {str(e)}</p>
629
- <p>Please try again or check the lecture_generation.log for details.</p>
630
  </div>
631
  """
632
  return
633
 
634
  audio_files = []
635
- speaker_audio = speaker_audio if speaker_audio else "feynman.mp3"
636
  validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
637
  if not validated_speaker_wav:
638
  logger.error("Invalid speaker audio after conversion, skipping TTS")
@@ -667,8 +695,8 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
667
  await asyncio.sleep(0.1)
668
  continue
669
 
670
- max_retries = 2
671
- for attempt in range(max_retries + 1):
672
  try:
673
  current_text = cleaned_script
674
  if attempt > 0:
@@ -690,7 +718,7 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
690
  break
691
  except Exception as e:
692
  logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
693
- if attempt == max_retries:
694
  logger.error("Max retries reached for slide %d, skipping", i + 1)
695
  audio_files.append(None)
696
  progress = 90 + ((i + 1) / len(scripts)) * 10
@@ -699,21 +727,15 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
699
  await asyncio.sleep(0.1)
700
  break
701
 
702
- # Prepare output HTML with gr.File for PDF and gr.FileExplorer for outputs
703
  slides_info = json.dumps({"slides": [
704
  {"title": slide["title"], "content": slide["content"]}
705
  for slide in slides
706
  ], "audioFiles": audio_files})
707
 
708
  html_output = f"""
709
- <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between; padding: 20px;">
710
- <div style="flex: 1; overflow: auto;">
711
- <h3>Lecture Slides</h3>
712
- <p>Download or view the slides PDF below (opens in your browser's PDF viewer):</p>
713
- <gradio-file value="{pdf_file}" label="Slides PDF" file_types=[".pdf"]></gradio-file>
714
- <h3>Generated Files</h3>
715
- <p>Explore all generated files (PDF, audio, scripts) in the output directory:</p>
716
- <gradio-file-explorer glob="/data/outputs/*" label="Output Directory"></gradio-file-explorer>
717
  </div>
718
  <div style="padding: 20px;">
719
  <div id="progress-bar" style="width: 100%; height: 5px; background-color: #ddd; border-radius: 2px; margin-bottom: 10px;">
@@ -738,7 +760,7 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
738
 
739
  for (let i = 0; i < totalSlides; i++) {{
740
  if (lectureData.audioFiles && lectureData.audioFiles[i]) {{
741
- const audio = new Audio('/gradio_api/file=' + lectureData.audioFiles[i]);
742
  audioElements.push(audio);
743
  }} else {{
744
  audioElements.push(null);
@@ -746,6 +768,9 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
746
  }}
747
 
748
  function updateSlide() {{
 
 
 
749
  slideCounter.textContent = `Slide ${{currentSlide + 1}} of ${{totalSlides}}`;
750
  progressFill.style.width = `${{(currentSlide + 1) / totalSlides * 100}}%`;
751
 
@@ -794,6 +819,9 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
794
  }});
795
  }}
796
  }});
 
 
 
797
  </script>
798
  """
799
  logger.info("Lecture generation completed successfully")
@@ -805,7 +833,7 @@ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture
805
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
806
  <h2 style="color: #d9534f;">Error during lecture generation</h2>
807
  <p style="margin-top: 10px; font-size: 16px;">{str(e)}</p>
808
- <p style="margin-top: 20px;">Please try again or check the lecture_generation.log for details.</p>
809
  </div>
810
  """
811
  return
 
 
 
 
 
1
  import os
2
  import json
3
  import re
 
34
  )
35
  logger = logging.getLogger(__name__)
36
 
37
+ # Set up environment
38
+ # For Huggingface Spaces, use /tmp for temporary storage
39
+ if os.path.exists("/tmp"):
40
+ OUTPUT_DIR = "/tmp/outputs" # Use /tmp for Huggingface Spaces
41
+ else:
42
+ OUTPUT_DIR = os.path.join(os.getcwd(), "outputs") # Fallback for local dev
43
  os.makedirs(OUTPUT_DIR, exist_ok=True)
44
+ logger.info(f"Using output directory: {OUTPUT_DIR}")
45
  os.environ["COQUI_TOS_AGREED"] = "1"
 
46
 
47
  # Define Pydantic model for slide data
48
  class Slide(BaseModel):
 
145
 
146
  # Helper function to validate and convert speaker audio (MP3 or WAV)
147
  async def validate_and_convert_speaker_audio(speaker_audio):
148
+ if not speaker_audio or not os.path.exists(speaker_audio):
149
+ logger.warning("Speaker audio file does not exist: %s. Using default voice.", speaker_audio)
150
+ default_voice = os.path.join(os.path.dirname(__file__), "feynman.mp3")
151
+ if os.path.exists(default_voice):
152
+ speaker_audio = default_voice
153
+ else:
154
+ logger.error("Default voice not found. Cannot proceed with TTS.")
155
+ return None
156
 
157
  try:
158
  # Check file extension
 
162
  audio = AudioSegment.from_mp3(speaker_audio)
163
  # Convert to mono, 22050 Hz
164
  audio = audio.set_channels(1).set_frame_rate(22050)
165
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False, dir=OUTPUT_DIR) as temp_file:
166
  audio.export(temp_file.name, format="wav")
167
  speaker_wav = temp_file.name
168
  elif ext == ".wav":
 
182
  if data.ndim == 2:
183
  logger.info("Converting stereo WAV to mono: %s", speaker_wav)
184
  data = data.mean(axis=1)
185
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False, dir=OUTPUT_DIR) as temp_file:
186
  sf.write(temp_file.name, data, samplerate)
187
  speaker_wav = temp_file.name
188
 
 
214
  if not isinstance(content, str):
215
  logger.warning("TextMessage content is not a string: %s", content)
216
  return None
217
+
218
+ # Try standard JSON block with triple backticks
219
  pattern = r"```json\s*(.*?)\s*```"
220
  match = re.search(pattern, content, re.DOTALL)
221
  if match:
222
  try:
223
+ json_str = match.group(1).strip()
224
+ logger.debug("Found JSON in code block: %s", json_str)
225
+ return json.loads(json_str)
226
  except json.JSONDecodeError as e:
227
+ logger.error("Failed to parse JSON from code block: %s", e)
228
+
229
+ # Try to find arrays or objects
230
+ json_patterns = [
231
+ r"\[\s*\{.*?\}\s*\]", # Array of objects
232
+ r"\{\s*\".*?\"\s*:.*?\}", # Object
233
+ ]
234
+
235
+ for pattern in json_patterns:
236
+ match = re.search(pattern, content, re.DOTALL)
237
+ if match:
238
+ try:
239
+ json_str = match.group(0).strip()
240
+ logger.debug("Found JSON with pattern %s: %s", pattern, json_str)
241
+ return json.loads(json_str)
242
+ except json.JSONDecodeError as e:
243
+ logger.error("Failed to parse JSON with pattern %s: %s", pattern, e)
244
+
245
+ # Try to find JSON anywhere in the content
246
  try:
247
+ for i in range(len(content)):
248
+ for j in range(len(content), i, -1):
249
+ substring = content[i:j].strip()
250
+ if (substring.startswith('{') and substring.endswith('}')) or \
251
+ (substring.startswith('[') and substring.endswith(']')):
252
+ try:
253
+ parsed = json.loads(substring)
254
+ if isinstance(parsed, (list, dict)):
255
+ logger.info("Found JSON in substring: %s", substring)
256
+ return parsed
257
+ except json.JSONDecodeError:
258
+ continue
259
+ except Exception as e:
260
+ logger.error("Error in JSON substring search: %s", e)
261
+
262
+ logger.warning("No JSON found in TextMessage content")
263
  return None
264
 
265
  elif isinstance(message, StructuredMessage):
 
279
  for ctx_msg in message.context:
280
  if hasattr(ctx_msg, "content"):
281
  content = ctx_msg.content
282
+ logger.debug("HandoffMessage context content: %s", content)
283
  if isinstance(content, str):
284
  pattern = r"```json\s*(.*?)\s*```"
285
  match = re.search(pattern, content, re.DOTALL)
286
  if match:
287
  try:
288
+ return json.loads(match.group(1))
 
 
 
 
 
 
 
 
 
 
 
289
  except json.JSONDecodeError as e:
290
+ logger.error("Failed to parse JSON from HandoffMessage: %s", e)
291
+
292
+ json_patterns = [
293
+ r"\[\s*\{.*?\}\s*\]", # Array of objects
294
+ r"\{\s*\".*?\"\s*:.*?\}", # Object
295
+ ]
296
+
297
+ for pattern in json_patterns:
298
+ match = re.search(pattern, content, re.DOTALL)
299
+ if match:
300
+ try:
301
+ return json.loads(match.group(0))
302
+ except json.JSONDecodeError as e:
303
+ logger.error("Failed to parse JSON with pattern %s: %s", pattern, e)
304
  elif isinstance(content, dict):
305
  return content.get("slides", content)
306
+
307
  logger.warning("No JSON found in HandoffMessage context")
308
  return None
309
 
310
  logger.warning("Unsupported message type for JSON extraction: %s", type(message))
311
  return None
312
 
313
+ # Function to generate HTML slides
314
+ def generate_html_slides(slides, title):
315
+ try:
316
+ slides_html = ""
317
+
318
+ for i, slide in enumerate(slides):
319
+ content_lines = slide['content'].replace('\n', '<br>')
320
+ slide_html = f"""
321
+ <div id="slide-{i+1}" class="slide" style="display: none; height: 100%; padding: 20px; text-align: center;">
322
+ <h1 style="margin-bottom: 10px;">{slide['title']}</h1>
323
+ <h3 style="margin-bottom: 20px; font-style: italic;">Prof. AI Feynman<br>Princeton University, April 26th, 2025</h3>
324
+ <div style="font-size: 1.2em; line-height: 1.6;">{content_lines}</div>
 
 
325
  </div>
 
 
326
  """
327
+ slides_html += slide_html
328
+
329
+ logger.info(f"Generated HTML slides for: {title}")
330
+ return slides_html
 
 
 
331
  except Exception as e:
332
+ logger.error(f"Failed to generate HTML slides: {str(e)}")
333
+ logger.error(traceback.format_exc())
334
+ return None
335
 
336
  # Async function to update audio preview
337
  async def update_audio_preview(audio_file):
 
351
  """
352
  return
353
 
 
 
 
 
354
  # Initialize TTS model
355
  tts = None
356
  try:
 
370
 
371
  model_client = get_model_client(api_service, api_key)
372
 
373
+ actual_content_slides = num_slides
374
+ total_slides = actual_content_slides + 3 # Content slides + quiz, assignment, thank-you
375
+
376
  research_agent = AssistantAgent(
377
  name="research_agent",
378
  model_client=model_client,
 
385
  model_client=model_client,
386
  handoffs=["script_agent"],
387
  system_message=f"""
388
+ You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({actual_content_slides}), generate exactly {actual_content_slides} content slides, plus one quiz slide, one assignment slide, and one thank-you slide, for a total of {total_slides} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {total_slides} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
389
+ Example output for 2 content slides:
390
  ```json
391
  [
392
  {{"title": "Slide 1", "content": "Content for slide 1"}},
 
404
  model_client=model_client,
405
  handoffs=["feynman_agent"],
406
  system_message=f"""
407
+ You are a Script Agent. Access the JSON array of {total_slides} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
408
+ Example for 3 content slides:
409
  ```json
410
  [
411
  "So, this slide, um, covers the main topic in a fun way.",
412
+ "The second slide introduces the key concepts.",
413
+ "This third slide shows some interesting applications.",
414
  "Alright, you know, answer these quiz questions.",
415
  "Here's your, like, assignment to complete.",
416
  "Thanks for, um, attending today!"
 
424
  model_client=model_client,
425
  handoffs=[],
426
  system_message=f"""
427
+ You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {total_slides} slides and {total_slides} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({total_slides}), report the issue clearly. Use 'TERMINATE' to signal completion.
428
+ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is coherent. TERMINATE'
429
  """)
430
 
431
  swarm = Swarm(
 
443
  Topic: {topic}
444
  Additional Instructions: {instructions}
445
  Audience: {lecture_type}
446
+ Number of Content Slides: {actual_content_slides}
447
  Please start by researching the topic.
448
  """
449
+ logger.info("Starting lecture generation for topic: %s with %d content slides", topic, actual_content_slides)
450
 
451
  slides = None
452
  scripts = None
 
458
  """
459
 
460
  try:
461
+ logger.info("Research Agent starting...")
462
+ task_result = await Console(swarm.run_stream(task=initial_message))
463
+ logger.info("Swarm execution completed")
464
+
465
  slide_retry_count = 0
466
+ script_retry_count = 0
467
+ max_retries = 2
468
 
469
+ for message in task_result.messages:
470
+ source = getattr(message, 'source', getattr(message, 'sender', None))
471
+ logger.debug("Processing message from %s, type: %s", source, type(message))
 
472
 
473
+ if isinstance(message, HandoffMessage):
474
+ logger.info("Handoff from %s to %s", source, message.target)
475
+ if source == "research_agent" and message.target == "slide_agent":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  progress = 25
477
  label = "Slides: generating..."
478
  yield html_with_progress(label, progress)
479
  await asyncio.sleep(0.1)
480
+ elif source == "slide_agent" and message.target == "script_agent":
481
+ if slides is None:
482
+ logger.warning("Slide Agent handoff without slides JSON")
483
+ extracted_json = extract_json_from_message(message)
484
+ if extracted_json:
485
+ slides = extracted_json
486
+ logger.info("Extracted slides JSON from HandoffMessage context: %s", slides)
487
+ if slides is None or len(slides) != total_slides:
488
+ if slide_retry_count < max_retries:
489
+ slide_retry_count += 1
490
+ logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
  retry_message = TextMessage(
492
+ content=f"Please generate exactly {total_slides} slides ({actual_content_slides} content slides plus quiz, assignment, thank-you) as per your instructions.",
493
  source="user",
494
+ recipient="slide_agent"
495
  )
496
  task_result.messages.append(retry_message)
497
  continue
498
+ progress = 50
499
+ label = "Scripts: generating..."
 
 
 
500
  yield html_with_progress(label, progress)
501
  await asyncio.sleep(0.1)
502
+ elif source == "script_agent" and message.target == "feynman_agent":
503
+ if scripts is None:
504
+ logger.warning("Script Agent handoff without scripts JSON")
505
+ extracted_json = extract_json_from_message(message)
506
+ if extracted_json:
507
+ scripts = extracted_json
508
+ logger.info("Extracted scripts JSON from HandoffMessage context: %s", scripts)
509
+ progress = 75
510
+ label = "Review: in progress..."
511
+ yield html_with_progress(label, progress)
512
+ await asyncio.sleep(0.1)
513
+
514
+ elif source == "research_agent" and isinstance(message, TextMessage) and "handoff_to_slide_agent" in message.content:
515
+ logger.info("Research Agent completed research")
516
+ progress = 25
517
+ label = "Slides: generating..."
518
+ yield html_with_progress(label, progress)
519
+ await asyncio.sleep(0.1)
520
 
521
+ elif source == "slide_agent" and isinstance(message, (TextMessage, StructuredMessage)):
522
+ logger.debug("Slide Agent message received")
523
+ extracted_json = extract_json_from_message(message)
524
+ if extracted_json:
525
+ slides = extracted_json
526
+ logger.info("Slide Agent generated %d slides", len(slides))
527
+ if len(slides) != total_slides:
528
+ if slide_retry_count < max_retries:
529
+ slide_retry_count += 1
530
+ logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
531
+ retry_message = TextMessage(
532
+ content=f"Please generate exactly {total_slides} slides ({actual_content_slides} content slides plus quiz, assignment, thank-you) as per your instructions.",
533
+ source="user",
534
+ recipient="slide_agent"
535
+ )
536
+ task_result.messages.append(retry_message)
537
+ continue
538
+ # Save slide content to individual files
539
+ for i, slide in enumerate(slides):
540
+ content_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_content.txt")
541
+ try:
542
+ with open(content_file, "w", encoding="utf-8") as f:
543
+ f.write(slide["content"])
544
+ logger.info("Saved slide content to %s", content_file)
545
+ except Exception as e:
546
+ logger.error("Error saving slide content to %s: %s", content_file, str(e))
547
+ progress = 50
548
+ label = "Scripts: generating..."
549
+ yield html_with_progress(label, progress)
550
+ await asyncio.sleep(0.1)
551
  else:
552
+ logger.warning("No JSON extracted from slide_agent message")
553
+ if slide_retry_count < max_retries:
554
+ slide_retry_count += 1
555
+ logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
556
+ retry_message = TextMessage(
557
+ content=f"Please generate exactly {total_slides} slides ({actual_content_slides} content slides plus quiz, assignment, thank-you) as per your instructions.",
558
+ source="user",
559
+ recipient="slide_agent"
560
+ )
561
+ task_result.messages.append(retry_message)
562
+ continue
563
+
564
+ elif source == "script_agent" and isinstance(message, (TextMessage, StructuredMessage)):
565
+ logger.debug("Script Agent message received")
566
+ extracted_json = extract_json_from_message(message)
567
+ if extracted_json:
568
+ scripts = extracted_json
569
+ logger.info("Script Agent generated scripts for %d slides", len(scripts))
570
+ # Save raw scripts to individual files
571
+ for i, script in enumerate(scripts):
572
+ script_file = os.path.join(OUTPUT_DIR, f"slide_{i+1}_raw_script.txt")
573
+ try:
574
+ with open(script_file, "w", encoding="utf-8") as f:
575
+ f.write(script)
576
+ logger.info("Saved raw script to %s", script_file)
577
+ except Exception as e:
578
+ logger.error("Error saving raw script to %s: %s", script_file, str(e))
579
+ progress = 75
580
+ label = "Scripts generated and saved. Reviewing..."
581
+ yield html_with_progress(label, progress)
582
+ await asyncio.sleep(0.1)
583
+ else:
584
+ logger.warning("No JSON extracted from script_agent message")
585
+ if script_retry_count < max_retries:
586
+ script_retry_count += 1
587
+ logger.info("Retrying script generation (attempt %d/%d)", script_retry_count, max_retries)
588
+ retry_message = TextMessage(
589
+ content=f"Please generate exactly {total_slides} scripts for the {total_slides} slides as per your instructions.",
590
+ source="user",
591
+ recipient="script_agent"
592
+ )
593
+ task_result.messages.append(retry_message)
594
+ continue
595
+
596
+ elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
597
+ logger.info("Feynman Agent completed lecture review: %s", message.content)
598
+ progress = 90
599
+ label = "Lecture materials ready. Generating audio..."
600
+ yield html_with_progress(label, progress)
601
+ await asyncio.sleep(0.1)
602
 
603
  logger.info("Slides state: %s", "Generated" if slides else "None")
604
  logger.info("Scripts state: %s", "Generated" if scripts else "None")
 
610
  for msg in task_result.messages:
611
  source = getattr(msg, 'source', getattr(msg, 'sender', None))
612
  logger.debug("Message from %s, type: %s, content: %s", source, type(msg), msg.to_text() if hasattr(msg, 'to_text') else str(msg))
613
+ yield f"""
614
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
615
+ <h2 style="color: #d9534f;">{error_message}</h2>
616
+ <p style="margin-top: 20px;">Please try again with a different model (e.g., Anthropic-claude-3-sonnet-20240229) or simplify the topic/instructions.</p>
617
+ </div>
618
+ """
619
+ return
620
+
621
+ if len(slides) != total_slides:
622
+ logger.error("Expected %d slides (including %d content slides + 3), but received %d", total_slides, actual_content_slides, len(slides))
623
+ yield f"""
624
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
625
+ <h2 style="color: #d9534f;">Incorrect number of slides</h2>
626
+ <p style="margin-top: 20px;">Expected {total_slides} slides ({actual_content_slides} content slides + quiz, assignment, thank-you), but generated {len(slides)}. Please try again.</p>
627
+ </div>
628
+ """
629
  return
630
 
631
  if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
 
638
  """
639
  return
640
 
641
+ if len(scripts) != total_slides:
642
  logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
643
  yield f"""
644
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
 
648
  """
649
  return
650
 
651
+ # Generate HTML slides
652
+ slides_html = generate_html_slides(slides, title)
653
+ if not slides_html:
654
+ logger.error("Failed to generate HTML slides")
 
655
  yield f"""
656
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
657
+ <h2 style="color: #d9534f;">Failed to generate slides</h2>
658
+ <p style="margin-top: 20px;">Please try again.</p>
 
659
  </div>
660
  """
661
  return
662
 
663
  audio_files = []
 
664
  validated_speaker_wav = await validate_and_convert_speaker_audio(speaker_audio)
665
  if not validated_speaker_wav:
666
  logger.error("Invalid speaker audio after conversion, skipping TTS")
 
695
  await asyncio.sleep(0.1)
696
  continue
697
 
698
+ max_audio_retries = 2
699
+ for attempt in range(max_audio_retries + 1):
700
  try:
701
  current_text = cleaned_script
702
  if attempt > 0:
 
718
  break
719
  except Exception as e:
720
  logger.error("Error generating audio for slide %d (attempt %d): %s\n%s", i + 1, attempt, str(e), traceback.format_exc())
721
+ if attempt == max_audio_retries:
722
  logger.error("Max retries reached for slide %d, skipping", i + 1)
723
  audio_files.append(None)
724
  progress = 90 + ((i + 1) / len(scripts)) * 10
 
727
  await asyncio.sleep(0.1)
728
  break
729
 
 
730
  slides_info = json.dumps({"slides": [
731
  {"title": slide["title"], "content": slide["content"]}
732
  for slide in slides
733
  ], "audioFiles": audio_files})
734
 
735
  html_output = f"""
736
+ <div id="lecture-container" style="height: 700px; border: 1px solid #ddd; border-radius: 8px; display: flex; flex-direction: column; justify-content: space-between;">
737
+ <div id="slide-content" style="flex: 1; overflow: auto;">
738
+ {slides_html}
 
 
 
 
 
739
  </div>
740
  <div style="padding: 20px;">
741
  <div id="progress-bar" style="width: 100%; height: 5px; background-color: #ddd; border-radius: 2px; margin-bottom: 10px;">
 
760
 
761
  for (let i = 0; i < totalSlides; i++) {{
762
  if (lectureData.audioFiles && lectureData.audioFiles[i]) {{
763
+ const audio = new Audio(lectureData.audioFiles[i]);
764
  audioElements.push(audio);
765
  }} else {{
766
  audioElements.push(null);
 
768
  }}
769
 
770
  function updateSlide() {{
771
+ for (let i = 1; i <= totalSlides; i++) {{
772
+ document.getElementById(`slide-${{i}}`).style.display = (i - 1 === currentSlide) ? 'block' : 'none';
773
+ }}
774
  slideCounter.textContent = `Slide ${{currentSlide + 1}} of ${{totalSlides}}`;
775
  progressFill.style.width = `${{(currentSlide + 1) / totalSlides * 100}}%`;
776
 
 
819
  }});
820
  }}
821
  }});
822
+
823
+ // Initialize first slide
824
+ updateSlide();
825
  </script>
826
  """
827
  logger.info("Lecture generation completed successfully")
 
833
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
834
  <h2 style="color: #d9534f;">Error during lecture generation</h2>
835
  <p style="margin-top: 10px; font-size: 16px;">{str(e)}</p>
836
+ <p style="margin-top: 20px;">Please try again or adjust your inputs.</p>
837
  </div>
838
  """
839
  return