Jaward commited on
Commit
b4d8cf2
·
verified ·
1 Parent(s): a9b14fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -1029
app.py CHANGED
@@ -1,6 +1,5 @@
1
- # Lectūra Research Demo: A Multi-Agent Tool for Self-taught Mastery.
2
- # Author: Jaward Sesay
3
- # License: All rights reserved.
4
  import os
5
  import json
6
  import re
@@ -29,19 +28,6 @@ import tempfile
29
  from pydub import AudioSegment
30
  from TTS.api import TTS
31
  import markdown
32
- import PyPDF2
33
- import io
34
- import copy
35
-
36
- def get_instructor_name(speaker):
37
- instructor_names = {
38
- "feynman.mp3": "Professor Richard Feynman",
39
- "einstein.mp3": "Professor Albert Einstein",
40
- "samantha.mp3": "Professor Samantha",
41
- "socrates.mp3": "Professor Socrates",
42
- "professor_lectura_male.mp3": "Professor Lectūra"
43
- }
44
- return instructor_names.get(speaker, "Professor Lectūra")
45
 
46
  # Set up logging
47
  logging.basicConfig(
@@ -56,11 +42,8 @@ logger = logging.getLogger(__name__)
56
 
57
  # Set up environment
58
  OUTPUT_DIR = os.path.join(os.getcwd(), "outputs")
59
- UPLOAD_DIR = os.path.join(os.getcwd(), "uploads")
60
  os.makedirs(OUTPUT_DIR, exist_ok=True)
61
- os.makedirs(UPLOAD_DIR, exist_ok=True)
62
  logger.info(f"Using output directory: {OUTPUT_DIR}")
63
- logger.info(f"Using upload directory: {UPLOAD_DIR}")
64
  os.environ["COQUI_TOS_AGREED"] = "1"
65
 
66
  # Initialize TTS model
@@ -121,7 +104,7 @@ def render_md_to_html(md_content: str) -> str:
121
  return "<div>Error rendering content</div>"
122
 
123
  # Slide tool for generating HTML slides used by slide_agent
124
- def create_slides(slides: list[dict], title: str, instructor_name: str, output_dir: str = OUTPUT_DIR) -> list[str]:
125
  try:
126
  html_files = []
127
  template_file = os.path.join(os.getcwd(), "slide_template.html")
@@ -139,7 +122,7 @@ def create_slides(slides: list[dict], title: str, instructor_name: str, output_d
139
  slide_html = slide_html.replace("section title", f"{slide['title']}")
140
  slide_html = slide_html.replace("Lecture title", title)
141
  slide_html = slide_html.replace("<!--CONTENT-->", html_content)
142
- slide_html = slide_html.replace("speaker name", instructor_name)
143
  slide_html = slide_html.replace("date", date)
144
 
145
  html_file = os.path.join(output_dir, f"slide_{slide_number}.html")
@@ -169,7 +152,7 @@ def html_with_progress(label, progress):
169
  <div style="width: 70%; background-color: lightgrey; border-radius: 80px; overflow: hidden; margin-bottom: 20px;">
170
  <div style="width: {progress}%; height: 15px; background-color: #4CAF50; border-radius: 80px;"></div>
171
  </div>
172
- <h2 style="font-style: italic; color: #555 !important;">{label}</h2>
173
  </div>
174
  """
175
 
@@ -222,7 +205,7 @@ def clean_script_text(script):
222
  async def validate_and_convert_speaker_audio(speaker_audio):
223
  if not speaker_audio or not os.path.exists(speaker_audio):
224
  logger.warning("Speaker audio file does not exist: %s. Using default voice.", speaker_audio)
225
- default_voice = os.path.join(os.path.dirname(__file__), "professor_lectura_male.mp3")
226
  if os.path.exists(default_voice):
227
  speaker_audio = default_voice
228
  else:
@@ -407,13 +390,6 @@ def get_gradio_file_url(local_path):
407
  # Async generate lecture materials and audio
408
  async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, lecture_style, speaker_audio, num_slides):
409
  model_client = get_model_client(api_service, api_key)
410
-
411
- # Get the speaker from the speaker_audio path
412
- speaker = os.path.basename(speaker_audio) if speaker_audio else "professor_lectura_male.mp3"
413
- logger.info(f"Selected speaker file: {speaker}")
414
-
415
- instructor_name = get_instructor_name(speaker)
416
- logger.info(f"Using instructor: {instructor_name}")
417
 
418
  if os.path.exists(OUTPUT_DIR):
419
  try:
@@ -449,7 +425,7 @@ async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_
449
  system_message=f"""
450
  You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({content_slides}), generate exactly {content_slides} content slides, plus an Introduction slide as the first slide and a Closing slide as the last slide, making a total of {total_slides} slides.
451
 
452
- - The Introduction slide (first slide) should have the title "{title}" and content containing only the lecture title, speaker name ({get_instructor_name(speaker_audio)}), and date {date}, centered, in plain text.
453
  - The Closing slide (last slide) should have the title "Closing" and content containing only "The End\nThank you", centered, in plain text.
454
  - The remaining {content_slides} slides should be content slides based on the lecture description, audience type, and lecture style ({lecture_style}), with meaningful titles and content in valid Markdown format. Adapt the content to the lecture style to suit diverse learners:
455
  - Feynman: Explains complex ideas with simplicity, clarity, and enthusiasm, emulating Richard Feynman's teaching style.
@@ -463,7 +439,7 @@ Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each
463
  Example output for 1 content slide (total 3 slides):
464
  ```json
465
  [
466
- {{"title": "Introduction to AI Basics", "content": "AI Basics\n{get_instructor_name(speaker_audio)}\n{date}"}},
467
  {{"title": "What is AI?", "content": "# What is AI?\n- Definition: Systems that mimic human intelligence\n- Key areas: ML, NLP, Robotics"}},
468
  {{"title": "Closing", "content": "The End\nThank you"}}
469
  ]
@@ -475,16 +451,22 @@ Example output for 1 content slide (total 3 slides):
475
  script_agent = AssistantAgent(
476
  name="script_agent",
477
  model_client=model_client,
478
- handoffs=["instructor_agent"],
479
  system_message=f"""
480
- You are a Script Agent. Access the JSON array of {total_slides} slides from the conversation history, which includes an Introduction slide, {content_slides} content slides, and a Closing slide. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone. Ensure the lecture is engaging, covers the fundamental requirements of the topic, and aligns with the lecture style ({lecture_style}) to suit diverse learners. The lecture will be delivered by {instructor_name}.
 
 
 
 
 
 
481
 
482
- Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_instructor_agent tool. If scripts cannot be generated, retry once.
483
 
484
  Example for 3 slides (1 content slide):
485
  ```json
486
  [
487
- "Welcome to the lecture on AI Basics. I am {instructor_name}, and today we will explore the fundamentals of artificial intelligence.",
488
  "Let us begin by defining artificial intelligence: it refers to systems that mimic human intelligence, spanning key areas such as machine learning, natural language processing, and robotics.",
489
  "That concludes our lecture on AI Basics. Thank you for your attention, and I hope you found this session insightful."
490
  ]
@@ -492,72 +474,17 @@ Example for 3 slides (1 content slide):
492
  output_content_type=None,
493
  reflect_on_tool_use=False
494
  )
495
-
496
- def get_instructor_prompt(speaker, lecture_style):
497
- base_prompts = {
498
- "feynman.mp3": f"You are {instructor_name}, known for your ability to explain complex concepts with remarkable clarity and enthusiasm. Your teaching style is characterized by:",
499
- "einstein.mp3": f"You are {instructor_name}, known for your profound insights and ability to connect abstract concepts to the physical world. Your teaching style is characterized by:",
500
- "samantha.mp3": f"You are {instructor_name}, known for your engaging and accessible approach to teaching. Your teaching style is characterized by:",
501
- "socrates.mp3": f"You are {instructor_name}, known for your method of questioning and guiding students to discover knowledge themselves. Your teaching style is characterized by:",
502
- "professor_lectura_male.mp3": f"You are {instructor_name}, known for your clear and authoritative teaching style. Your teaching style is characterized by:"
503
- }
504
-
505
- style_characteristics = {
506
- "Feynman - Simplifies complex ideas with enthusiasm": """
507
- - Breaking down complex ideas into simple, understandable parts
508
- - Using analogies and real-world examples
509
- - Maintaining enthusiasm and curiosity throughout
510
- - Encouraging critical thinking and questioning
511
- - Making abstract concepts tangible and relatable""",
512
-
513
- "Socratic - Guides insights with probing questions": """
514
- - Using thought-provoking questions to guide understanding
515
- - Encouraging self-discovery and critical thinking
516
- - Challenging assumptions and exploring implications
517
- - Building knowledge through dialogue and inquiry
518
- - Fostering intellectual curiosity and reflection""",
519
-
520
- "Inspirational - Sparks enthusiasm with visionary ideas": """
521
- - Connecting concepts to broader implications and possibilities
522
- - Using motivational language and visionary thinking
523
- - Inspiring curiosity and wonder about the subject
524
- - Highlighting the transformative potential of knowledge
525
- - Encouraging students to think beyond conventional boundaries""",
526
-
527
- "Reflective - Promotes introspection with a calm tone": """
528
- - Creating a contemplative learning environment
529
- - Encouraging deep thinking and personal connection
530
- - Using a calm, measured delivery
531
- - Promoting self-reflection and understanding
532
- - Building connections between concepts and personal experience""",
533
-
534
- "Humorous - Uses wit and anecdotes for engaging content": """
535
- - Incorporating relevant humor and anecdotes
536
- - Making learning enjoyable and memorable
537
- - Using wit to highlight key concepts
538
- - Creating an engaging and relaxed atmosphere
539
- - Balancing entertainment with educational value"""
540
- }
541
-
542
- base_prompt = base_prompts.get(speaker, base_prompts["feynman.mp3"])
543
- style_prompt = style_characteristics.get(lecture_style, style_characteristics["Feynman - Simplifies complex ideas with enthusiasm"])
544
-
545
- return f"""{base_prompt}
546
- {style_prompt}
547
-
548
- Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {total_slides} slides and {total_slides} scripts are received, including the Introduction and Closing slides. Verify that HTML slide files exist in the outputs directory and align with the lecture style ({lecture_style}). Output a confirmation message summarizing the number of slides, scripts, and HTML files status. If slides, scripts, or HTML files are missing, invalid, or do not match the expected count ({total_slides}), report the issue clearly. Use 'TERMINATE' to signal completion.
549
- Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files. Lecture is coherent and aligns with {lecture_style} style. TERMINATE'
550
- """
551
-
552
- instructor_agent = AssistantAgent(
553
- name="instructor_agent",
554
  model_client=model_client,
555
  handoffs=[],
556
- system_message=get_instructor_prompt(speaker_audio, lecture_style)
557
- )
 
 
558
 
559
  swarm = Swarm(
560
- participants=[research_agent, slide_agent, script_agent, instructor_agent],
561
  termination_condition=HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
562
  )
563
 
@@ -641,7 +568,7 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
641
  []
642
  )
643
  await asyncio.sleep(0.1)
644
- elif source == "script_agent" and message.target == "instructor_agent":
645
  if scripts is None:
646
  logger.warning("Script Agent handoff without scripts JSON")
647
  extracted_json = extract_json_from_message(message)
@@ -683,8 +610,8 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
683
  )
684
  task_result.messages.append(retry_message)
685
  continue
686
- # Generate HTML slides with instructor name
687
- html_files = create_slides(slides, title, instructor_name)
688
  if not html_files:
689
  logger.error("Failed to generate HTML slides")
690
  progress = 50
@@ -741,8 +668,8 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
741
  task_result.messages.append(retry_message)
742
  continue
743
 
744
- elif source == "instructor_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
745
- logger.info("Instructor Agent completed lecture review: %s", message.content)
746
  progress = 90
747
  label = "Lecture materials ready. Generating lecture speech..."
748
  file_paths = [f for f in os.listdir(OUTPUT_DIR) if f.endswith(('.md', '.txt'))]
@@ -827,13 +754,12 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
827
  logger.error("Invalid speaker audio after conversion, skipping TTS")
828
  yield (
829
  f"""
830
- <div style=\"display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;\">
831
- <h2 style=\"color: #d9534f;\">Invalid speaker audio</h2>
832
- <p style=\"margin-top: 20px;\">Please upload a valid MP3 or WAV audio file and try again.</p>
833
  </div>
834
  """,
835
- [],
836
- None
837
  )
838
  return
839
 
@@ -858,8 +784,7 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
858
  label = f"Generating lecture speech for slide {i + 1}/{len(scripts)}..."
859
  yield (
860
  html_with_progress(label, progress),
861
- file_paths,
862
- None
863
  )
864
  await asyncio.sleep(0.1)
865
  continue
@@ -886,8 +811,7 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
886
  file_paths.append(audio_file)
887
  yield (
888
  html_with_progress(label, progress),
889
- file_paths,
890
- None
891
  )
892
  await asyncio.sleep(0.1)
893
  break
@@ -901,8 +825,7 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
901
  label = f"Generating speech for slide {i + 1}/{len(scripts)}..."
902
  yield (
903
  html_with_progress(label, progress),
904
- file_paths,
905
- None
906
  )
907
  await asyncio.sleep(0.1)
908
  break
@@ -931,29 +854,18 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
931
  {audio_timeline}
932
  </div>
933
  <div style="display: center; justify-content: center; margin-bottom: 10px;">
934
- <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i class="fas fa-step-backward" style="color: #fff !important"></i></button>
935
- <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i class="fas fa-play" style="color: #fff !important"></i></button>
936
- <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i class="fas fa-step-forward" style="color: #fff !important"></i></button>
937
- <button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i style="color: #fff !important" class="fas fa-expand"></i></button>
938
- <button id="clear-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: black"><i style="color: #fff !important" class="fas fa-paint-brush"></i></button>
939
  </div>
940
  </div>
941
  </div>
942
  """
943
  logger.info("Yielding final lecture materials after audio generation")
944
- # --- YIELD LECTURE CONTEXT FOR AGENTS ---
945
- lecture_context = {
946
- "slides": slides,
947
- "scripts": scripts,
948
- "title": title,
949
- "description": lecture_content_description,
950
- "style": lecture_style,
951
- "audience": lecture_type
952
- }
953
  yield (
954
  html_output,
955
- file_paths,
956
- lecture_context
957
  )
958
 
959
  logger.info("Lecture generation completed successfully")
@@ -968,21 +880,14 @@ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files
968
  <p style="margin-top: 20px;">Please try again</p>
969
  </div>
970
  """,
971
- [],
972
- None
973
  )
974
  return
975
 
976
- # custom js
977
  js_code = """
978
  () => {
979
  // Function to wait for an element to appear in the DOM
980
- window.addEventListener('load', function () {
981
- gradioURL = window.location.href
982
- if (!gradioURL.endsWith('?__theme=light')) {
983
- window.location.replace(gradioURL + '?__theme=light');
984
- }
985
- });
986
  function waitForElement(selector, callback, maxAttempts = 50, interval = 100) {
987
  let attempts = 0;
988
  const intervalId = setInterval(() => {
@@ -1027,8 +932,7 @@ window.addEventListener('load', function () {
1027
  const totalSlides = lectureData.htmlFiles.length;
1028
  let audioElements = [];
1029
  let isPlaying = false;
1030
- let hasNavigated = false;
1031
- let currentAudioIndex = 0;
1032
 
1033
  // Wait for slide-content element
1034
  waitForElement('#slide-content', (slideContent) => {
@@ -1060,38 +964,15 @@ window.addEventListener('load', function () {
1060
  if (body) {
1061
  const textLength = body.textContent.length;
1062
  const screenWidth = window.innerWidth;
1063
- const screenHeight = window.innerHeight;
1064
-
1065
- // Base font size calculation
1066
- let baseFontSize;
1067
- if (screenWidth >= 1920) {
1068
- baseFontSize = 20; // Large screens
1069
- } else if (screenWidth >= 1366) {
1070
- baseFontSize = 18; // Medium screens
1071
- } else {
1072
- baseFontSize = 16; // Small screens
1073
- }
1074
-
1075
- // Adjust based on content length
1076
- let adjustedFontSize;
1077
- if (textLength > 1000) {
1078
- adjustedFontSize = baseFontSize * 0.8; // Reduce for long content
1079
- } else if (textLength > 500) {
1080
- adjustedFontSize = baseFontSize * 0.9; // Slightly reduce for medium content
1081
- } else {
1082
- adjustedFontSize = baseFontSize; // Keep base size for short content
1083
- }
1084
-
1085
- // Ensure minimum and maximum sizes
1086
- adjustedFontSize = Math.max(14, Math.min(24, adjustedFontSize));
1087
-
1088
- // Apply to all elements
1089
  const elements = body.getElementsByTagName('*');
1090
  for (let elem of elements) {
1091
  elem.style.fontSize = `${adjustedFontSize}px`;
1092
  }
1093
-
1094
- console.log(`Adjusted font size to ${adjustedFontSize}px for ${textLength} characters on ${screenWidth}x${screenHeight} screen`);
1095
  }
1096
  };
1097
  });
@@ -1115,7 +996,7 @@ window.addEventListener('load', function () {
1115
  if (audio && audio.pause) {
1116
  audio.pause();
1117
  audio.currentTime = 0;
1118
- audio.style.border = 'none';
1119
  console.log("Paused and reset audio:", audio.id);
1120
  }
1121
  });
@@ -1141,7 +1022,7 @@ window.addEventListener('load', function () {
1141
 
1142
  function prevSlide() {
1143
  console.log("Previous button clicked, current slide:", currentSlide);
1144
- hasNavigated = true;
1145
  if (currentSlide > 0) {
1146
  currentSlide--;
1147
  updateSlide(() => {
@@ -1159,7 +1040,7 @@ window.addEventListener('load', function () {
1159
 
1160
  function nextSlide() {
1161
  console.log("Next button clicked, current slide:", currentSlide);
1162
- hasNavigated = true;
1163
  if (currentSlide < totalSlides - 1) {
1164
  currentSlide++;
1165
  updateSlide(() => {
@@ -1183,13 +1064,13 @@ window.addEventListener('load', function () {
1183
  return;
1184
  }
1185
  const playIcon = playBtn.querySelector('i');
1186
-
1187
- if (isPlaying) {
1188
  // Pause playback
1189
  isPlaying = false;
1190
  audioElements.forEach(audio => {
1191
  if (audio && audio.pause) {
1192
  audio.pause();
 
1193
  audio.style.border = 'none';
1194
  console.log("Paused audio:", audio.id);
1195
  }
@@ -1197,16 +1078,14 @@ window.addEventListener('load', function () {
1197
  playIcon.className = 'fas fa-play';
1198
  return;
1199
  }
1200
-
1201
  // Start playback
 
 
1202
  isPlaying = true;
1203
  playIcon.className = 'fas fa-pause';
1204
- currentSlide = 0;
1205
- currentAudioIndex = 0;
1206
-
1207
  updateSlide(() => {
1208
  function playNext() {
1209
- if (currentAudioIndex >= totalSlides || !isPlaying) {
1210
  isPlaying = false;
1211
  playIcon.className = 'fas fa-play';
1212
  audioElements.forEach(audio => {
@@ -1215,64 +1094,72 @@ window.addEventListener('load', function () {
1215
  console.log("Finished playing all slides or paused");
1216
  return;
1217
  }
1218
-
1219
- currentSlide = currentAudioIndex;
1220
  updateSlide(() => {
1221
- const audio = audioElements[currentAudioIndex];
1222
  if (audio && audio.play) {
 
1223
  audioElements.forEach(a => a.style.border = 'none');
1224
  audio.style.border = '5px solid #16cd16';
1225
  audio.style.borderRadius = '30px';
1226
- console.log(`Attempting to play audio for slide ${currentAudioIndex + 1}`);
1227
-
1228
  audio.play().then(() => {
1229
- console.log(`Playing audio for slide ${currentAudioIndex + 1}`);
 
1230
  audio.onended = null;
1231
  audio.addEventListener('ended', () => {
1232
- if (isPlaying) {
1233
- console.log(`Audio ended for slide ${currentAudioIndex + 1}`);
1234
- currentAudioIndex++;
1235
- playNext();
1236
- }
1237
  }, { once: true });
1238
-
1239
  const checkDuration = setInterval(() => {
1240
  if (!isPlaying) {
1241
  clearInterval(checkDuration);
1242
  return;
1243
  }
1244
  if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
1245
- console.log(`Fallback: Audio for slide ${currentAudioIndex + 1} considered ended`);
1246
  clearInterval(checkDuration);
1247
- audio.onended = null;
1248
- currentAudioIndex++;
1249
  playNext();
1250
  }
1251
  }, 1000);
1252
  }).catch(e => {
1253
- console.error(`Audio play failed for slide ${currentAudioIndex + 1}:`, e);
 
1254
  setTimeout(() => {
1255
- if (isPlaying) {
1256
- audio.play().then(() => {
1257
- console.log(`Retry succeeded for slide ${currentAudioIndex + 1}`);
1258
- audio.onended = null;
1259
- audio.addEventListener('ended', () => {
1260
- if (isPlaying) {
1261
- console.log(`Audio ended for slide ${currentAudioIndex + 1}`);
1262
- currentAudioIndex++;
1263
- playNext();
1264
- }
1265
- }, { once: true });
1266
- }).catch(e => {
1267
- console.error(`Retry failed for slide ${currentAudioIndex + 1}:`, e);
1268
- currentAudioIndex++;
1269
  playNext();
1270
- });
1271
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1272
  }, 500);
1273
  });
1274
  } else {
1275
- currentAudioIndex++;
1276
  playNext();
1277
  }
1278
  });
@@ -1334,197 +1221,40 @@ window.addEventListener('load', function () {
1334
  const lectureContainer = document.getElementById('lecture-container');
1335
  if (lectureContainer) {
1336
  console.log("Lecture container detected in DOM");
1337
- observer.disconnect();
1338
  initializeSlides();
1339
  }
1340
  }
1341
  });
1342
  });
 
 
1343
  observer.observe(document.body, { childList: true, subtree: true });
1344
  console.log("Started observing DOM for lecture container");
1345
  }
1346
  """
1347
 
1348
- def process_uploaded_file(file):
1349
- """Process uploaded file and extract text content."""
1350
- try:
1351
- # Determine if file is a NamedString (Gradio string-like object) or file-like object
1352
- file_name = os.path.basename(file.name if hasattr(file, 'name') else str(file))
1353
- file_path = os.path.join(UPLOAD_DIR, file_name)
1354
-
1355
- # Get file extension
1356
- _, ext = os.path.splitext(file_path)
1357
- ext = ext.lower()
1358
-
1359
- # Handle PDF files differently
1360
- if ext == '.pdf':
1361
- # For PDF files, write the raw bytes
1362
- if hasattr(file, 'read'):
1363
- with open(file_path, 'wb') as f:
1364
- f.write(file.read())
1365
- else:
1366
- # If it's a file path, copy the file
1367
- shutil.copy2(str(file), file_path)
1368
-
1369
- # Process PDF file
1370
- pdf_reader = PyPDF2.PdfReader(file_path)
1371
- text = ""
1372
- for page in pdf_reader.pages:
1373
- text += page.extract_text() + "\n"
1374
- logger.info("Extracted text from PDF: %s", file_path)
1375
- return text
1376
-
1377
- # Handle text files
1378
- elif ext in ('.txt', '.md'):
1379
- # Read content and save to UPLOAD_DIR
1380
- if hasattr(file, 'read'): # File-like object
1381
- content = file.read()
1382
- if isinstance(content, bytes):
1383
- content = content.decode('utf-8', errors='replace')
1384
- with open(file_path, 'w', encoding='utf-8') as f:
1385
- f.write(content)
1386
- else: # NamedString or string-like
1387
- # If it's a file path, read the file
1388
- if os.path.exists(str(file)):
1389
- with open(str(file), 'r', encoding='utf-8') as f:
1390
- content = f.read()
1391
- else:
1392
- content = str(file)
1393
- with open(file_path, 'w', encoding='utf-8') as f:
1394
- f.write(content)
1395
-
1396
- # Clean and return content
1397
- cleaned_content = clean_script_text(content)
1398
- logger.info("Cleaned content for %s: %s", file_path, cleaned_content[:100] + "..." if len(cleaned_content) > 100 else cleaned_content)
1399
- return cleaned_content
1400
- else:
1401
- raise ValueError(f"Unsupported file format: {ext}")
1402
- except Exception as e:
1403
- logger.error(f"Error processing file {file_path}: {str(e)}")
1404
- raise
1405
-
1406
- async def study_mode_process(file, api_service, api_key):
1407
- """Process uploaded file in study mode."""
1408
- max_retries = 1
1409
- for attempt in range(max_retries + 1):
1410
- try:
1411
- # Extract text from file
1412
- content = process_uploaded_file(file)
1413
- logger.info("Successfully extracted content from file: %s", file)
1414
-
1415
- # Create study agent
1416
- logger.info("Initializing model client for service: %s", api_service)
1417
- model_client = get_model_client(api_service, api_key)
1418
- logger.info("Model client initialized successfully")
1419
-
1420
- study_agent = AssistantAgent(
1421
- name="study_agent",
1422
- model_client=model_client,
1423
- system_message="""You are a Study Agent that analyzes lecture materials and generates appropriate inputs for the lecture generation system.
1424
- Analyze the provided content and generate:
1425
- 1. A concise title (max 10 words)
1426
- 2. A brief content description (max 20 words)
1427
-
1428
- Output the results in JSON format:
1429
- {
1430
- "title": "string",
1431
- "content_description": "string"
1432
- }"""
1433
- )
1434
-
1435
- # Process content with study agent
1436
- logger.info("Running study agent with content length: %d", len(content))
1437
- task_result = await Console(study_agent.run_stream(task=content))
1438
- logger.info("Study agent execution completed")
1439
-
1440
- for message in task_result.messages:
1441
- extracted_json = extract_json_from_message(message)
1442
- if extracted_json and isinstance(extracted_json, dict):
1443
- if "title" in extracted_json and "content_description" in extracted_json:
1444
- logger.info("Valid JSON output: %s", extracted_json)
1445
- return extracted_json
1446
- else:
1447
- logger.warning("Incomplete JSON output: %s", extracted_json)
1448
-
1449
- raise ValueError("No valid JSON output with title and content_description from study agent")
1450
-
1451
- except Exception as e:
1452
- logger.error("Attempt %d/%d failed: %s\n%s", attempt + 1, max_retries + 1, str(e), traceback.format_exc())
1453
- if attempt == max_retries:
1454
- raise Exception(f"Failed to process file after {max_retries + 1} attempts: {str(e)}")
1455
- logger.info("Retrying study mode processing...")
1456
- await asyncio.sleep(1) # Brief delay before retry
1457
-
1458
  # Gradio interface
1459
  with gr.Blocks(
1460
- title="Lectūra AI",
1461
  css="""
1462
- h1 {text-align: center; color: #000 !important;}
1463
- .gradio-container-5-29-0 .prose :last-child {color: #fff !important; }
1464
  #lecture-container {font-family: 'Times New Roman', Times, serif;}
1465
  #slide-content {font-size: 48px; line-height: 1.2;}
1466
- #form-group {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; color: #000; background-color: white;}
1467
  #download {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px;}
1468
- #uploaded-file {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px;}
1469
  #slide-display {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white;}
1470
- .gradio-container { background: #fff !important; box-shadow: 0 0 2rem rgba(255, 255, 255, 0.14);padding-top: 30px;}
1471
  button {transition: background-color 0.3s;}
1472
  button:hover {background-color: #e0e0e0;}
1473
- .upload-area {border: 2px dashed #ccc; border-radius: 20px; padding: 40px; text-align: center; cursor: pointer; height: 100%; min-height: 700px; display: flex; flex-direction: column; justify-content: center; align-items: center;}
1474
- .upload-area:hover {border-color: #16cd16;}
1475
- .upload-area.dragover {border-color: #16cd16; background-color: rgba(22, 205, 22, 0.1);}
1476
- .wrap.svelte-1kzox3m {justify-content: center;}
1477
- #mode-tabs {border-radius: 30px !important;}
1478
- #component-2 {border-radius: 30px; box-shadow: rgba(0, 0, 0, 0.14) 0px 0px 2rem !important; width: 290px;}
1479
- #component-0 {align-items: center;justify-content: center;}
1480
- #component-26 {box-shadow: rgba(0, 0, 0, 0.14) 0px 0px 2rem !important; border-radius: 30px; height: 970px !important; overflow: auto !important;}
1481
- #right-column {padding: 10px !important; height: 100% !important; display: flex !important; flex-direction: column !important; gap: 20px !important;}
1482
- #notes-section {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white; padding: 20px; flex: 0 0 auto; display: flex; flex-direction: column; overflow: hidden;}
1483
- #chat-section {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white; padding: 20px; flex: 1; display: flex; flex-direction: column; overflow: hidden; min-height: 760px;}
1484
- .note-button {width: 100%; border-radius: 15px; margin-bottom: 10px; padding: 10px; background-color: #f0f0f0; border: none; cursor: pointer; color: #000 !important}
1485
- .note-button:hover {background-color: #e0e0e0;}
1486
- .notes-list {flex: 1; overflow-y: auto; margin-top: 0px; min-height: 0;}
1487
- .chat-input-container {display: flex; gap: 10px; margin-top: auto; padding-top: 20px;}
1488
- .chat-input {flex-grow: 1; border-radius: 20px; padding: 10px 20px; border: 1px solid #ddd;background-color: rgb(240, 240, 240)}
1489
- .send-button {border-radius: 20px; padding: 10px 25px; background-color: #16cd16; color: white; border: none; cursor: pointer;}
1490
- .send-button:hover {background-color: #14b814;}
1491
- .back-button {border-radius: 50%; width: 40px; height: 40px; background-color: #f0f0f0; border: none; cursor: pointer; display: flex; align-items: center; justify-content: center;}
1492
- .back-button:hover {background-color: #e0e0e0;}
1493
- .note-editor {display: none; width: 100%; height: 100%; min-height: 0;}
1494
- .note-editor.active {display: flex; flex-direction: column;}
1495
- .notes-view {display: flex; flex-direction: column; height: 100%; min-height: 0;}
1496
- .notes-view.hidden {display: none;}
1497
- .chat-messages {flex: 1; overflow-y: auto; margin-bottom: 20px; min-height: 0;}
1498
- #study-guide-btn {margin-bottom: 0px !important}
1499
- #component-26 {padding: 20px}
1500
- .gradio-container-5-29-0 .prose :last-child {color: black !important;}
1501
- #add-note-btn, #study-guide-btn, #quiz-btn, #send-btn{border-radius: 30px !important;}
1502
- #chatbot {border-radius: 20px !important;}
1503
- #chat-input-row {align-items: center !important;}
1504
- .gradio-container { background-color: white !important; color: black !important;}
1505
- main {max-width: fit-content !important}
1506
- #component-36 {height: 460px !important}
1507
  """,
1508
  js=js_code,
1509
  head='<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">'
1510
  ) as demo:
1511
  gr.Markdown("""
1512
- # <center>Lectūra: Your AI Genie for Self-taught Mastery.</center>""")
1513
-
1514
- # Add mode tabs
1515
  with gr.Row():
1516
- with gr.Column(scale=1):
1517
- with gr.Group(elem_id="mode-tabs"):
1518
- mode_tabs = gr.Radio(
1519
- choices=["Learn Mode", "Study Mode"],
1520
- value="Learn Mode",
1521
- label="Mode",
1522
- elem_id="mode-tabs",
1523
- show_label=False
1524
- )
1525
-
1526
- with gr.Row():
1527
- # Left column (existing form)
1528
  with gr.Column(scale=1):
1529
  with gr.Group(elem_id="form-group"):
1530
  title = gr.Textbox(label="Lecture Title", placeholder="e.g. Introduction to AI")
@@ -1549,431 +1279,17 @@ with gr.Blocks(
1549
  api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama or Azure AI Foundry (use GITHUB_TOKEN env var)")
1550
  serpapi_key = gr.Textbox(label="SerpApi Key (For Research Agent)", type="password", placeholder="Enter your SerpApi key (optional)")
1551
  num_slides = gr.Slider(1, 20, step=1, label="Number of Lecture Slides (will add intro and closing slides)", value=3)
1552
- speaker_select = gr.Dropdown(
1553
- choices=["feynman.mp3", "einstein.mp3", "samantha.mp3", "socrates.mp3", "professor_lectura_male.mp3"],
1554
- value="professor_lectura_male.mp3",
1555
- label="Select Instructor",
1556
- elem_id="speaker-select"
1557
- )
1558
- speaker_audio = gr.Audio(value="professor_lectura_male.mp3", label="Speaker sample speech (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
1559
  generate_btn = gr.Button("Generate Lecture")
1560
-
1561
- # Middle column (existing slide display)
1562
  with gr.Column(scale=2):
1563
  default_slide_html = """
1564
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
1565
- <h2 style="font-style: italic; color: #000 !important;">Waiting for lecture content...</h2>
1566
- <p style="margin-top: 10px; font-size: 16px;color: #000 !important">Please Generate lecture content via the form on the left first before lecture begins</p>
1567
- </div>
1568
- """
1569
-
1570
- # Study mode upload area
1571
- study_mode_html = """
1572
- <div class="upload-area" id="upload-area">
1573
- <h2 style="margin-top: 20px; color: #000;">Please upload lecture material by clicking the upload button below</h2>
1574
- <p style="color: #666;">(only supports .pdf, .txt and .md)</p>
1575
  </div>
1576
  """
1577
  slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html, elem_id="slide-display")
1578
- uploaded_file = gr.File(label="Upload Lecture Material", visible=False, elem_id="uploaded-file")
1579
  file_output = gr.File(label="Download Lecture Materials", elem_id="download")
1580
-
1581
- # --- RIGHT COLUMN SPLIT: NOTES (TOP) AND CHAT (BOTTOM) ---
1582
- with gr.Column(scale=1, elem_id="right-column"): # Add elem_id for CSS targeting
1583
- # State for notes and lecture context
1584
- notes_state = gr.State([]) # List of notes: [{"title": ..., "content": ...}]
1585
- lecture_context_state = gr.State({}) # Dict with latest lecture slides/scripts
1586
- chat_history_state = gr.State([]) # List of {user, assistant}
1587
-
1588
- with gr.Row():
1589
- with gr.Column(scale=1, elem_id="notes-section"): # NOTES SECTION (TOP)
1590
- with gr.Row():
1591
- add_note_btn = gr.Button("+ Add note", elem_id="add-note-btn")
1592
- study_guide_btn = gr.Button("Study Guide", elem_id="study-guide-btn")
1593
- quiz_btn = gr.Button("Quiz Yourself", elem_id="quiz-btn")
1594
- note_response = gr.Textbox(label="Response", visible=True, value="Your notes, study guides, and quizzes will appear here...")
1595
- notes_list = gr.Dataframe(headers=["Title"], interactive=False, label="Your Notes", elem_id="notes-list")
1596
- with gr.Column(visible=False) as note_editor:
1597
- note_title = gr.Textbox(label="Note Title", elem_id="note-title")
1598
- note_content = gr.Textbox(label="Note Content", lines=10, elem_id="note-content")
1599
- with gr.Row():
1600
- save_note_btn = gr.Button("Save Note", elem_id="save-note-btn")
1601
- back_btn = gr.Button("Back", elem_id="back-btn")
1602
-
1603
- with gr.Column(scale=1, elem_id="chat-section"): # CHAT SECTION (BOTTOM)
1604
- with gr.Column():
1605
- chatbot = gr.Chatbot(label="Chat", elem_id="chatbot", height=220, show_copy_button=True, type="messages")
1606
- with gr.Row(elem_id="chat-input-row"):
1607
- chat_input = gr.Textbox(show_label=False, placeholder="Type your message...", lines=1, elem_id="chat-input", scale=10)
1608
- send_btn = gr.Button("Send", elem_id="send-btn", scale=1)
1609
-
1610
- # --- UI LOGIC FOR SHOWING/HIDING RESPONSE COMPONENTS ---
1611
- def show_only(component):
1612
- return (
1613
- gr.update(visible=(component == "note")),
1614
- gr.update(visible=(component == "study")),
1615
- gr.update(visible=(component == "quiz")),
1616
- )
1617
-
1618
- async def add_note_fn(notes, lecture_context, api_service, api_key, title_val, desc_val, style_val, audience_val):
1619
- context = get_fallback_lecture_context(lecture_context, title_val, desc_val, style_val, audience_val)
1620
- note = await run_note_agent(api_service, api_key, context, "", "")
1621
- note_text = (note.get("title", "") + "\n" + note.get("content", "")).strip()
1622
- return (
1623
- gr.update(value=note_text),
1624
- note.get("title", ""),
1625
- note.get("content", "")
1626
- )
1627
- add_note_btn.click(
1628
- fn=add_note_fn,
1629
- inputs=[notes_state, lecture_context_state, api_service, api_key, title, lecture_content_description, lecture_style, lecture_type],
1630
- outputs=[note_response, note_title, note_content]
1631
- )
1632
-
1633
- # Study Guide button: generate study guide and show response
1634
- async def study_guide_btn_fn(notes, lecture_context, api_service, api_key, title_val, desc_val, style_val, audience_val):
1635
- context = get_fallback_lecture_context(lecture_context, title_val, desc_val, style_val, audience_val)
1636
- guide = await run_study_agent(api_service, api_key, context)
1637
- return gr.update(value=guide)
1638
- study_guide_btn.click(
1639
- fn=study_guide_btn_fn,
1640
- inputs=[notes_state, lecture_context_state, api_service, api_key, title, lecture_content_description, lecture_style, lecture_type],
1641
- outputs=[note_response]
1642
- )
1643
-
1644
- # Quiz button: generate quiz and show response
1645
- async def quiz_btn_fn(notes, lecture_context, api_service, api_key, title_val, desc_val, style_val, audience_val):
1646
- context = get_fallback_lecture_context(lecture_context, title_val, desc_val, style_val, audience_val)
1647
- quiz = await run_quiz_agent(api_service, api_key, context)
1648
- return gr.update(value=quiz)
1649
- quiz_btn.click(
1650
- fn=quiz_btn_fn,
1651
- inputs=[notes_state, lecture_context_state, api_service, api_key, title, lecture_content_description, lecture_style, lecture_type],
1652
- outputs=[note_response]
1653
- )
1654
-
1655
- # Back button: clear response
1656
- back_btn.click(
1657
- fn=lambda: gr.update(value="Click any button above to generate content..."),
1658
- inputs=[],
1659
- outputs=[note_response]
1660
- )
1661
-
1662
- # Save Note button: add note to state and update list, clear response
1663
- async def save_note(note_title_val, note_content_val, notes, lecture_context, api_service, api_key, note_type=None):
1664
- note = await run_note_agent(api_service, api_key, get_fallback_lecture_context(lecture_context, note_title_val, note_content_val, "", ""), note_title_val, note_content_val)
1665
- # Prefix title with note type if provided
1666
- if note_type:
1667
- note["title"] = note_type_prefix(note_type, note.get("title", ""))
1668
- new_notes = copy.deepcopy(notes)
1669
- new_notes.append(note)
1670
- # Save note content to a .txt file
1671
- note_file = os.path.join(OUTPUT_DIR, f"{note['title']}.txt")
1672
- with open(note_file, "w", encoding="utf-8") as f:
1673
- f.write(note['content'])
1674
- return (
1675
- update_notes_list(new_notes),
1676
- new_notes,
1677
- gr.update(value="Click any button above to generate content...")
1678
- )
1679
- save_note_btn.click(
1680
- fn=save_note,
1681
- inputs=[note_title, note_content, notes_state, lecture_context_state, api_service, api_key],
1682
- outputs=[notes_list, notes_state, note_response]
1683
- )
1684
-
1685
- # --- CHAT AGENT LOGIC ---
1686
- async def chat_fn(user_message, chat_history, lecture_context, api_service, api_key, title_val, desc_val):
1687
- if not user_message.strip():
1688
- return chat_history, "", chat_history, gr.update(), gr.update()
1689
- form_update, response = await run_chat_agent(api_service, api_key, lecture_context, chat_history, user_message)
1690
- new_history = chat_history.copy()
1691
- # Append user message
1692
- if user_message:
1693
- new_history.append({"role": "user", "content": user_message})
1694
- # Append assistant response
1695
- if response:
1696
- new_history.append({"role": "assistant", "content": response})
1697
- title_update = gr.update()
1698
- desc_update = gr.update()
1699
- if form_update:
1700
- title = form_update.get("title")
1701
- desc = form_update.get("content_description")
1702
- msg = ""
1703
- if title:
1704
- msg += f"\nLecture Title: {title}"
1705
- title_update = gr.update(value=title)
1706
- if desc:
1707
- msg += f"\nLecture Content Description: {desc}"
1708
- desc_update = gr.update(value=desc)
1709
- new_history.append({"role": "assistant", "content": msg.strip()})
1710
- return new_history, "", new_history, title_update, desc_update
1711
- return new_history, "", new_history, title_update, desc_update
1712
- send_btn.click(
1713
- fn=chat_fn,
1714
- inputs=[chat_input, chat_history_state, lecture_context_state, api_service, api_key, title, lecture_content_description],
1715
- outputs=[chatbot, chat_input, chat_history_state, title, lecture_content_description]
1716
- )
1717
-
1718
- js_code = js_code + """
1719
- // Add file upload handling
1720
- function initializeFileUpload() {
1721
- const uploadArea = document.getElementById('upload-area');
1722
- if (!uploadArea) return;
1723
-
1724
- // Create hidden file input
1725
- const fileInput = document.createElement('input');
1726
- fileInput.type = 'file';
1727
- fileInput.accept = '.pdf,.txt,.md';
1728
- fileInput.style.display = 'none';
1729
- uploadArea.appendChild(fileInput);
1730
-
1731
- // Handle click on the entire upload area
1732
- uploadArea.addEventListener('click', (e) => {
1733
- if (e.target !== fileInput) {
1734
- fileInput.click();
1735
- }
1736
- });
1737
-
1738
- fileInput.addEventListener('change', (e) => {
1739
- const file = e.target.files[0];
1740
- if (file) {
1741
- const dataTransfer = new DataTransfer();
1742
- dataTransfer.items.add(file);
1743
- const gradioFileInput = document.querySelector('input[type="file"]');
1744
- if (gradioFileInput) {
1745
- gradioFileInput.files = dataTransfer.files;
1746
- const event = new Event('change', { bubbles: true });
1747
- gradioFileInput.dispatchEvent(event);
1748
- }
1749
- }
1750
- });
1751
-
1752
- // Handle drag and drop
1753
- ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
1754
- uploadArea.addEventListener(eventName, preventDefaults, false);
1755
- });
1756
-
1757
- function preventDefaults(e) {
1758
- e.preventDefault();
1759
- e.stopPropagation();
1760
- }
1761
-
1762
- ['dragenter', 'dragover'].forEach(eventName => {
1763
- uploadArea.addEventListener(eventName, highlight, false);
1764
- });
1765
-
1766
- ['dragleave', 'drop'].forEach(eventName => {
1767
- uploadArea.addEventListener(eventName, unhighlight, false);
1768
- });
1769
-
1770
- function highlight(e) {
1771
- uploadArea.classList.add('dragover');
1772
- }
1773
-
1774
- function unhighlight(e) {
1775
- uploadArea.classList.remove('dragover');
1776
- }
1777
-
1778
- uploadArea.addEventListener('drop', handleDrop, false);
1779
-
1780
- function handleDrop(e) {
1781
- const dt = e.dataTransfer;
1782
- const file = dt.files[0];
1783
- if (file) {
1784
- const dataTransfer = new DataTransfer();
1785
- dataTransfer.items.add(file);
1786
- const gradioFileInput = document.querySelector('input[type="file"]');
1787
- if (gradioFileInput) {
1788
- gradioFileInput.files = dataTransfer.files;
1789
- const event = new Event('change', { bubbles: true });
1790
- gradioFileInput.dispatchEvent(event);
1791
- }
1792
- }
1793
- }
1794
- }
1795
-
1796
- // Initialize clear button functionality
1797
- function initializeClearButton() {
1798
- const clearButton = document.getElementById('clear-btn');
1799
- if (clearButton) {
1800
- clearButton.addEventListener('click', () => {
1801
- const modeTabs = document.querySelector('.mode-tabs input[type="radio"]:checked');
1802
- const isStudyMode = modeTabs && modeTabs.value === 'Study Mode';
1803
-
1804
- // Reset all audio elements
1805
- const audioElements = document.querySelectorAll('audio');
1806
- audioElements.forEach(audio => {
1807
- audio.pause();
1808
- audio.currentTime = 0;
1809
- audio.style.border = 'none';
1810
- });
1811
-
1812
- // Reset play button
1813
- const playBtn = document.getElementById('play-btn');
1814
- if (playBtn) {
1815
- const playIcon = playBtn.querySelector('i');
1816
- if (playIcon) {
1817
- playIcon.className = 'fas fa-play';
1818
- }
1819
- }
1820
-
1821
- const slideContent = document.getElementById('slide-content');
1822
- if (slideContent) {
1823
- if (isStudyMode) {
1824
- slideContent.innerHTML = `
1825
- <div class="upload-area" id="upload-area">
1826
- <h2 style="margin-top: 20px; color: #000;">Please upload lecture material by clicking the upload button below</h2>
1827
- <p style="color: #666;">(only supports .pdf, .txt and .md)</p>
1828
- </div>
1829
- `;
1830
- initializeFileUpload();
1831
- } else {
1832
- slideContent.innerHTML = `
1833
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
1834
- <h2 style="font-style: italic; color: #000 !important;">Waiting for lecture content...</h2>
1835
- <p style="margin-top: 10px; font-size: 16px;color: #000">Please Generate lecture content via the form on the left first before lecture begins</p>
1836
- </div>
1837
- `;
1838
- }
1839
- }
1840
- });
1841
- }
1842
- }
1843
-
1844
- // Initialize speaker selection
1845
- function initializeSpeakerSelect() {
1846
- const speakerSelect = document.getElementById('speaker-select');
1847
- const speakerAudio = document.querySelector('#speaker-audio input[type="file"]');
1848
-
1849
- if (speakerSelect && speakerAudio) {
1850
- speakerSelect.addEventListener('change', (e) => {
1851
- const selectedSpeaker = e.target.value;
1852
- // Create a new File object from the selected speaker
1853
- fetch(selectedSpeaker)
1854
- .then(response => response.blob())
1855
- .then(blob => {
1856
- const file = new File([blob], selectedSpeaker, { type: 'audio/mpeg' });
1857
- const dataTransfer = new DataTransfer();
1858
- dataTransfer.items.add(file);
1859
- speakerAudio.files = dataTransfer.files;
1860
- const event = new Event('change', { bubbles: true });
1861
- speakerAudio.dispatchEvent(event);
1862
- });
1863
- });
1864
- }
1865
- }
1866
-
1867
- // Initialize file upload when study mode is active
1868
- function checkAndInitializeUpload() {
1869
- const uploadArea = document.getElementById('upload-area');
1870
- if (uploadArea) {
1871
- console.log('Initializing file upload...');
1872
- initializeFileUpload();
1873
- }
1874
- initializeClearButton();
1875
- initializeSpeakerSelect();
1876
- }
1877
-
1878
- // Check immediately and also set up an observer
1879
- checkAndInitializeUpload();
1880
-
1881
- const modeObserver = new MutationObserver((mutations) => {
1882
- mutations.forEach((mutation) => {
1883
- if (mutation.addedNodes.length) {
1884
- checkAndInitializeUpload();
1885
- }
1886
- });
1887
- });
1888
- modeObserver.observe(document.body, { childList: true, subtree: true });
1889
- """
1890
-
1891
- # Handle mode switching
1892
- def switch_mode(mode):
1893
- if mode == "Learn Mode":
1894
- return default_slide_html, gr.update(visible=True), gr.update(visible=False)
1895
- else:
1896
- return study_mode_html, gr.update(visible=True), gr.update(visible=True)
1897
-
1898
- mode_tabs.change(
1899
- fn=switch_mode,
1900
- inputs=[mode_tabs],
1901
- outputs=[slide_display, generate_btn, uploaded_file]
1902
- )
1903
-
1904
- # Handle file upload in study mode
1905
- async def handle_file_upload(file, api_service, api_key):
1906
- """Handle file upload in study mode and validate API key."""
1907
- if not file:
1908
- yield default_slide_html, None, None
1909
- return
1910
-
1911
- # Validate API key or GITHUB_TOKEN for Azure AI Foundry
1912
- if not api_key and api_service != "Azure AI Foundry":
1913
- error_html = """
1914
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
1915
- <h2 style="color: #d9534f;">Please input api key first</h2>
1916
- <p style="margin-top: 20px;">An API key is required to process uploaded files in Study mode. Please provide a valid API key and try again.</p>
1917
- </div>
1918
- """
1919
- logger.warning("API key is empty, terminating file upload")
1920
- yield error_html, None, None
1921
- return
1922
- elif api_service == "Azure AI Foundry" and not os.environ.get("GITHUB_TOKEN"):
1923
- error_html = """
1924
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
1925
- <h2 style="color: #d9534f;">GITHUB_TOKEN not set</h2>
1926
- <p style="margin-top: 20px;">Azure AI Foundry requires a GITHUB_TOKEN environment variable. Please set it and try again.</p>
1927
- </div>
1928
- """
1929
- logger.warning("GITHUB_TOKEN is missing for Azure AI Foundry, terminating file upload")
1930
- yield error_html, None, None
1931
- return
1932
-
1933
- try:
1934
- # Show uploading progress
1935
- yield html_with_progress("Uploading Lecture Material...", 25), None, None
1936
- await asyncio.sleep(0.1)
1937
-
1938
- # Show processing progress
1939
- yield html_with_progress("Processing file...", 50), None, None
1940
- await asyncio.sleep(0.1)
1941
-
1942
- # Process file and generate inputs
1943
- yield html_with_progress("Researching lecture material...", 75), None, None
1944
- await asyncio.sleep(0.1)
1945
-
1946
- result = await study_mode_process(file, api_service, api_key)
1947
-
1948
- # Show success message with updated inputs
1949
- success_html = """
1950
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
1951
- <h2 style="font-style: italic; color: #000 !important;">Research on study material completed, you can now generate lecture</h2>
1952
- <p style="margin-top: 10px; font-size: 16px;color: #000">The form has been updated with the extracted information. Click Generate Lecture to proceed.</p>
1953
- </div>
1954
- """
1955
-
1956
- # Update only title and description
1957
- yield (
1958
- success_html,
1959
- result["title"],
1960
- result["content_description"]
1961
- )
1962
- except Exception as e:
1963
- error_html = f"""
1964
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
1965
- <h2 style="color: #d9534f;">Error processing file</h2>
1966
- <p style="margin-top: 20px;">{str(e)}</p>
1967
- </div>
1968
- """
1969
- logger.error(f"Error processing file: {str(e)}")
1970
- yield error_html, None, None
1971
-
1972
- uploaded_file.change(
1973
- fn=handle_file_upload,
1974
- inputs=[uploaded_file, api_service, api_key],
1975
- outputs=[slide_display, title, lecture_content_description]
1976
- )
1977
 
1978
  speaker_audio.change(
1979
  fn=update_audio_preview,
@@ -1987,239 +1303,5 @@ with gr.Blocks(
1987
  outputs=[slide_display, file_output]
1988
  )
1989
 
1990
- # Handle speaker selection
1991
- def update_speaker_audio(speaker):
1992
- logger.info(f"Speaker selection changed to: {speaker}")
1993
- return speaker
1994
-
1995
- speaker_select.change(
1996
- fn=update_speaker_audio,
1997
- inputs=[speaker_select],
1998
- outputs=[speaker_audio]
1999
- )
2000
-
2001
- js_code = js_code + """
2002
- // Add note editor functionality
2003
- function initializeNoteEditor() {
2004
- const addNoteBtn = document.getElementById('add-note-btn');
2005
- const backBtn = document.getElementById('back-btn');
2006
- const notesView = document.getElementById('notes-view');
2007
- const noteEditor = document.getElementById('note-editor');
2008
-
2009
- if (addNoteBtn && backBtn && notesView && noteEditor) {
2010
- addNoteBtn.addEventListener('click', () => {
2011
- notesView.style.display = 'none';
2012
- noteEditor.style.display = 'block';
2013
- });
2014
-
2015
- backBtn.addEventListener('click', () => {
2016
- noteEditor.style.display = 'none';
2017
- notesView.style.display = 'block';
2018
- });
2019
- }
2020
- }
2021
-
2022
- // Initialize all components
2023
- function initializeComponents() {
2024
- initializeFileUpload();
2025
- initializeClearButton();
2026
- initializeSpeakerSelect();
2027
- initializeNoteEditor();
2028
- }
2029
-
2030
- // Check immediately and also set up an observer
2031
- initializeComponents();
2032
-
2033
- const observer = new MutationObserver((mutations) => {
2034
- mutations.forEach((mutation) => {
2035
- if (mutation.addedNodes.length) {
2036
- initializeComponents();
2037
- }
2038
- });
2039
- });
2040
- observer.observe(document.body, { childList: true, subtree: true });
2041
- """
2042
-
2043
- async def run_note_agent(api_service, api_key, lecture_context, note_title, note_content):
2044
- model_client = get_model_client(api_service, api_key)
2045
- system_message = (
2046
- "You are a Note Agent. Given the current lecture slides and scripts, help the user draft a note. "
2047
- "If a title or content is provided, improve or complete the note. If not, suggest a new note based on the lecture. "
2048
- "Always use the lecture context. Output a JSON object: {\"title\": ..., \"content\": ...}."
2049
- )
2050
- note_agent = AssistantAgent(
2051
- name="note_agent",
2052
- model_client=model_client,
2053
- system_message=system_message
2054
- )
2055
- context_str = json.dumps(lecture_context)
2056
- user_input = f"Lecture Context: {context_str}\nNote Title: {note_title}\nNote Content: {note_content}"
2057
- result = await Console(note_agent.run_stream(task=user_input))
2058
- # Return only the agent's reply
2059
- for msg in reversed(result.messages):
2060
- if getattr(msg, 'source', None) == 'note_agent' and hasattr(msg, 'content') and isinstance(msg.content, str):
2061
- try:
2062
- extracted = extract_json_from_message(msg)
2063
- if extracted and isinstance(extracted, dict):
2064
- return extracted
2065
- except Exception:
2066
- continue
2067
-
2068
- for msg in reversed(result.messages):
2069
- if hasattr(msg, 'content') and isinstance(msg.content, str):
2070
- try:
2071
- extracted = extract_json_from_message(msg)
2072
- if extracted and isinstance(extracted, dict):
2073
- return extracted
2074
- except Exception:
2075
- continue
2076
- return {"title": note_title, "content": note_content}
2077
-
2078
- async def run_study_agent(api_service, api_key, lecture_context):
2079
- model_client = get_model_client(api_service, api_key)
2080
- system_message = (
2081
- "You are a Study Guide Agent. Given the current lecture slides and scripts, generate a concise study guide (max 200 words) summarizing the key points and actionable steps for the student. Output plain text only."
2082
- )
2083
- study_agent = AssistantAgent(
2084
- name="study_agent",
2085
- model_client=model_client,
2086
- system_message=system_message
2087
- )
2088
- context_str = json.dumps(lecture_context)
2089
- user_input = f"Lecture Context: {context_str}"
2090
- result = await Console(study_agent.run_stream(task=user_input))
2091
- # Return only the agent's reply
2092
- for msg in reversed(result.messages):
2093
- if getattr(msg, 'source', None) == 'study_agent' and hasattr(msg, 'content') and isinstance(msg.content, str):
2094
- return msg.content.strip()
2095
- for msg in reversed(result.messages):
2096
- if hasattr(msg, 'content') and isinstance(msg.content, str):
2097
- return msg.content.strip()
2098
- return "No study guide generated."
2099
-
2100
- async def run_quiz_agent(api_service, api_key, lecture_context):
2101
- model_client = get_model_client(api_service, api_key)
2102
- system_message = (
2103
- "You are a Quiz Agent. Given the current lecture slides and scripts, generate a short quiz (3-5 questions) to test understanding. Output plain text only."
2104
- )
2105
- quiz_agent = AssistantAgent(
2106
- name="quiz_agent",
2107
- model_client=model_client,
2108
- system_message=system_message
2109
- )
2110
- context_str = json.dumps(lecture_context)
2111
- user_input = f"Lecture Context: {context_str}"
2112
- result = await Console(quiz_agent.run_stream(task=user_input))
2113
- # Return only the agent's reply
2114
- for msg in reversed(result.messages):
2115
- if getattr(msg, 'source', None) == 'quiz_agent' and hasattr(msg, 'content') and isinstance(msg.content, str):
2116
- return msg.content.strip()
2117
- for msg in reversed(result.messages):
2118
- if hasattr(msg, 'content') and isinstance(msg.content, str):
2119
- return msg.content.strip()
2120
- return "No quiz generated."
2121
-
2122
- async def run_chat_agent(api_service, api_key, lecture_context, chat_history, user_message):
2123
- model_client = get_model_client(api_service, api_key)
2124
- system_message = (
2125
- "You are a helpful Chat Agent. Answer questions about the lecture, and if the user asks for a lecture title or content description, suggest appropriate values. "
2126
- "If you want to update the form, output a JSON object: {\"title\": ..., \"content_description\": ...}. Otherwise, just reply as normal."
2127
- )
2128
- chat_agent = AssistantAgent(
2129
- name="chat_agent",
2130
- model_client=model_client,
2131
- system_message=system_message
2132
- )
2133
- context_str = json.dumps(lecture_context)
2134
- chat_str = "\n".join([f"User: {m['content']}" if m['role']=='user' else f"Assistant: {m['content']}" for m in chat_history])
2135
- user_input = f"Lecture Context: {context_str}\nChat History: {chat_str}\nUser: {user_message}"
2136
- result = await Console(chat_agent.run_stream(task=user_input))
2137
- # Return only the chat_agent's reply
2138
- for msg in reversed(result.messages):
2139
- if getattr(msg, 'source', None) == 'chat_agent' and hasattr(msg, 'content') and isinstance(msg.content, str):
2140
- extracted = extract_json_from_message(msg)
2141
- if extracted and isinstance(extracted, dict):
2142
- return extracted, None
2143
- return None, msg.content.strip()
2144
- for msg in reversed(result.messages):
2145
- if hasattr(msg, 'content') and isinstance(msg.content, str):
2146
- extracted = extract_json_from_message(msg)
2147
- if extracted and isinstance(extracted, dict):
2148
- return extracted, None
2149
- return None, msg.content.strip()
2150
- return None, "No response."
2151
-
2152
- def update_notes_list(notes):
2153
- """Convert notes list to DataFrame format for Gradio Dataframe (titles only)."""
2154
- return [[n["title"]] for n in notes]
2155
-
2156
- def show_note_editor_with_content(title, content):
2157
- return (
2158
- gr.update(visible=True), # note_editor
2159
- gr.update(visible=False), # notes_list
2160
- gr.update(visible=False), # study_guide_output
2161
- gr.update(visible=False), # quiz_output
2162
- gr.update(value=title), # note_title
2163
- gr.update(value=content) # note_content
2164
- )
2165
-
2166
- def hide_note_editor():
2167
- return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
2168
-
2169
- def show_study_guide(guide):
2170
- return gr.update(visible=False), gr.update(visible=True), gr.update(value=guide, visible=True), gr.update(visible=False)
2171
-
2172
- def show_quiz(quiz):
2173
- return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=quiz, visible=True)
2174
-
2175
- # Helper to get fallback lecture context from form fields
2176
-
2177
- def get_fallback_lecture_context(lecture_context, title_val, desc_val, style_val, audience_val):
2178
- # If slides/scripts missing, use form fields
2179
- if lecture_context and (lecture_context.get("slides") or lecture_context.get("scripts")):
2180
- return lecture_context
2181
- return {
2182
- "slides": [],
2183
- "scripts": [],
2184
- "title": title_val or "Untitled Lecture",
2185
- "description": desc_val or "No description provided.",
2186
- "style": style_val or "Feynman - Simplifies complex ideas with enthusiasm",
2187
- "audience": audience_val or "University"
2188
- }
2189
-
2190
- # Show note content when a note title is clicked
2191
-
2192
- def show_note_content(evt: dict, notes):
2193
- # evt['index'] gives the row index
2194
- idx = evt.get('index', 0)
2195
- if 0 <= idx < len(notes):
2196
- note = notes[idx]
2197
- note_file = os.path.join(OUTPUT_DIR, f"{note['title']}.txt")
2198
- if os.path.exists(note_file):
2199
- with open(note_file, "r", encoding="utf-8") as f:
2200
- note_text = f.read()
2201
- return gr.update(value=note_text)
2202
- return gr.update(value="Click any button above to generate content...")
2203
- notes_list.select(
2204
- fn=show_note_content,
2205
- inputs=[notes_state],
2206
- outputs=note_response
2207
- )
2208
-
2209
- # --- NOTES LOGIC ---
2210
- def note_type_prefix(note_type, title):
2211
- if note_type and not title.startswith(note_type):
2212
- return f"{note_type} - {title}"
2213
- return title
2214
-
2215
- custom_css = """
2216
- #right-column {height: 100% !important; display: flex !important; flex-direction: column !important; gap: 20px !important;}
2217
- #notes-section, #chat-section {flex: 1 1 0; min-height: 0; max-height: 50vh; overflow-y: auto;}
2218
- #chat-section {display: flex; flex-direction: column; position: relative;}
2219
- #chatbot {flex: 1 1 auto; min-height: 0; max-height: calc(50vh - 60px); overflow-y: auto;}
2220
- #chat-input-row {position: sticky; bottom: 0; background: white; z-index: 2; padding-top: 8px;}
2221
- """
2222
- demo.css += custom_css
2223
-
2224
  if __name__ == "__main__":
2225
  demo.launch(allowed_paths=[OUTPUT_DIR])
 
1
+ # Professor AI Feynman: A Multi-Agent Tool for Learning Anything the Feynman way
2
+ # Jaward Sesay - Microsoft AI Agent Hackathon Submission April 2025
 
3
  import os
4
  import json
5
  import re
 
28
  from pydub import AudioSegment
29
  from TTS.api import TTS
30
  import markdown
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  # Set up logging
33
  logging.basicConfig(
 
42
 
43
  # Set up environment
44
  OUTPUT_DIR = os.path.join(os.getcwd(), "outputs")
 
45
  os.makedirs(OUTPUT_DIR, exist_ok=True)
 
46
  logger.info(f"Using output directory: {OUTPUT_DIR}")
 
47
  os.environ["COQUI_TOS_AGREED"] = "1"
48
 
49
  # Initialize TTS model
 
104
  return "<div>Error rendering content</div>"
105
 
106
  # Slide tool for generating HTML slides used by slide_agent
107
+ def create_slides(slides: list[dict], title: str, output_dir: str = OUTPUT_DIR) -> list[str]:
108
  try:
109
  html_files = []
110
  template_file = os.path.join(os.getcwd(), "slide_template.html")
 
122
  slide_html = slide_html.replace("section title", f"{slide['title']}")
123
  slide_html = slide_html.replace("Lecture title", title)
124
  slide_html = slide_html.replace("<!--CONTENT-->", html_content)
125
+ slide_html = slide_html.replace("speaker name", "Prof. AI Feynman")
126
  slide_html = slide_html.replace("date", date)
127
 
128
  html_file = os.path.join(output_dir, f"slide_{slide_number}.html")
 
152
  <div style="width: 70%; background-color: lightgrey; border-radius: 80px; overflow: hidden; margin-bottom: 20px;">
153
  <div style="width: {progress}%; height: 15px; background-color: #4CAF50; border-radius: 80px;"></div>
154
  </div>
155
+ <h2 style="font-style: italic; color: #555;">{label}</h2>
156
  </div>
157
  """
158
 
 
205
  async def validate_and_convert_speaker_audio(speaker_audio):
206
  if not speaker_audio or not os.path.exists(speaker_audio):
207
  logger.warning("Speaker audio file does not exist: %s. Using default voice.", speaker_audio)
208
+ default_voice = os.path.join(os.path.dirname(__file__), "feynman.mp3")
209
  if os.path.exists(default_voice):
210
  speaker_audio = default_voice
211
  else:
 
390
  # Async generate lecture materials and audio
391
  async def on_generate(api_service, api_key, serpapi_key, title, lecture_content_description, lecture_type, lecture_style, speaker_audio, num_slides):
392
  model_client = get_model_client(api_service, api_key)
 
 
 
 
 
 
 
393
 
394
  if os.path.exists(OUTPUT_DIR):
395
  try:
 
425
  system_message=f"""
426
  You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({content_slides}), generate exactly {content_slides} content slides, plus an Introduction slide as the first slide and a Closing slide as the last slide, making a total of {total_slides} slides.
427
 
428
+ - The Introduction slide (first slide) should have the title "{title}" and content containing only the lecture title, speaker name (Prof. AI Feynman), and date {date}, centered, in plain text.
429
  - The Closing slide (last slide) should have the title "Closing" and content containing only "The End\nThank you", centered, in plain text.
430
  - The remaining {content_slides} slides should be content slides based on the lecture description, audience type, and lecture style ({lecture_style}), with meaningful titles and content in valid Markdown format. Adapt the content to the lecture style to suit diverse learners:
431
  - Feynman: Explains complex ideas with simplicity, clarity, and enthusiasm, emulating Richard Feynman's teaching style.
 
439
  Example output for 1 content slide (total 3 slides):
440
  ```json
441
  [
442
+ {{"title": "Introduction to AI Basics", "content": "AI Basics\nProf. AI Feynman\nMay 2nd, 2025"}},
443
  {{"title": "What is AI?", "content": "# What is AI?\n- Definition: Systems that mimic human intelligence\n- Key areas: ML, NLP, Robotics"}},
444
  {{"title": "Closing", "content": "The End\nThank you"}}
445
  ]
 
451
  script_agent = AssistantAgent(
452
  name="script_agent",
453
  model_client=model_client,
454
+ handoffs=["feynman_agent"],
455
  system_message=f"""
456
+ You are a Script Agent modeled after Richard Feynman. Access the JSON array of {total_slides} slides from the conversation history, which includes an Introduction slide, {content_slides} content slides, and a Closing slide. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone, with humor as Professor Feynman would deliver it. Ensure the lecture is engaging, covers the fundamental requirements of the topic, and aligns with the lecture style ({lecture_style}) to suit diverse learners:
457
+ - Feynman: Explains complex ideas with simplicity, clarity, and enthusiasm, emulating Richard Feynman's teaching style.
458
+ - Socratic: Poses thought-provoking questions to guide learners to insights without requiring direct interaction.
459
+ - Narrative: Use storytelling or analogies to explain concepts.
460
+ - Analytical: Focus on data, equations, or logical breakdowns.
461
+ - Humorous: Infuses wit and light-hearted anecdotes to make content engaging and memorable.
462
+ - Reflective: Encourages introspection with a calm, contemplative tone to deepen understanding.
463
 
464
+ Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
465
 
466
  Example for 3 slides (1 content slide):
467
  ```json
468
  [
469
+ "Welcome to the lecture on AI Basics. I am Professor AI Feynman, and today we will explore the fundamentals of artificial intelligence.",
470
  "Let us begin by defining artificial intelligence: it refers to systems that mimic human intelligence, spanning key areas such as machine learning, natural language processing, and robotics.",
471
  "That concludes our lecture on AI Basics. Thank you for your attention, and I hope you found this session insightful."
472
  ]
 
474
  output_content_type=None,
475
  reflect_on_tool_use=False
476
  )
477
+ feynman_agent = AssistantAgent(
478
+ name="feynman_agent",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
479
  model_client=model_client,
480
  handoffs=[],
481
+ system_message=f"""
482
+ You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {total_slides} slides and {total_slides} scripts are received, including the Introduction and Closing slides. Verify that HTML slide files exist in the outputs directory and align with the lecture style ({lecture_style}). Output a confirmation message summarizing the number of slides, scripts, and HTML files status. If slides, scripts, or HTML files are missing, invalid, or do not match the expected count ({total_slides}), report the issue clearly. Use 'TERMINATE' to signal completion.
483
+ Example: 'Received {total_slides} slides, {total_slides} scripts, and HTML files. Lecture is coherent and aligns with {lecture_style} style. TERMINATE'
484
+ """)
485
 
486
  swarm = Swarm(
487
+ participants=[research_agent, slide_agent, script_agent, feynman_agent],
488
  termination_condition=HandoffTermination(target="user") | TextMentionTermination("TERMINATE")
489
  )
490
 
 
568
  []
569
  )
570
  await asyncio.sleep(0.1)
571
+ elif source == "script_agent" and message.target == "feynman_agent":
572
  if scripts is None:
573
  logger.warning("Script Agent handoff without scripts JSON")
574
  extracted_json = extract_json_from_message(message)
 
610
  )
611
  task_result.messages.append(retry_message)
612
  continue
613
+ # Generate HTML slides
614
+ html_files = create_slides(slides, title)
615
  if not html_files:
616
  logger.error("Failed to generate HTML slides")
617
  progress = 50
 
668
  task_result.messages.append(retry_message)
669
  continue
670
 
671
+ elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
672
+ logger.info("Feynman Agent completed lecture review: %s", message.content)
673
  progress = 90
674
  label = "Lecture materials ready. Generating lecture speech..."
675
  file_paths = [f for f in os.listdir(OUTPUT_DIR) if f.endswith(('.md', '.txt'))]
 
754
  logger.error("Invalid speaker audio after conversion, skipping TTS")
755
  yield (
756
  f"""
757
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
758
+ <h2 style="color: #d9534f;">Invalid speaker audio</h2>
759
+ <p style="margin-top: 20px;">Please upload a valid MP3 or WAV audio file and try again.</p>
760
  </div>
761
  """,
762
+ []
 
763
  )
764
  return
765
 
 
784
  label = f"Generating lecture speech for slide {i + 1}/{len(scripts)}..."
785
  yield (
786
  html_with_progress(label, progress),
787
+ file_paths
 
788
  )
789
  await asyncio.sleep(0.1)
790
  continue
 
811
  file_paths.append(audio_file)
812
  yield (
813
  html_with_progress(label, progress),
814
+ file_paths
 
815
  )
816
  await asyncio.sleep(0.1)
817
  break
 
825
  label = f"Generating speech for slide {i + 1}/{len(scripts)}..."
826
  yield (
827
  html_with_progress(label, progress),
828
+ file_paths
 
829
  )
830
  await asyncio.sleep(0.1)
831
  break
 
854
  {audio_timeline}
855
  </div>
856
  <div style="display: center; justify-content: center; margin-bottom: 10px;">
857
+ <button id="prev-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: lightgrey"><i class="fas fa-step-backward" style="color: #000"></i></button>
858
+ <button id="play-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: lightgrey"><i class="fas fa-play" style="color: #000"></i></button>
859
+ <button id="next-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: lightgrey"><i class="fas fa-step-forward" style="color: #000"></i></button>
860
+ <button id="fullscreen-btn" style="border-radius: 50%; width: 40px; height: 40px; margin: 0 5px; font-size: 1.2em; cursor: pointer; background-color: lightgrey"><i style="color: #000" class="fas fa-expand"></i></button>
 
861
  </div>
862
  </div>
863
  </div>
864
  """
865
  logger.info("Yielding final lecture materials after audio generation")
 
 
 
 
 
 
 
 
 
866
  yield (
867
  html_output,
868
+ file_paths
 
869
  )
870
 
871
  logger.info("Lecture generation completed successfully")
 
880
  <p style="margin-top: 20px;">Please try again</p>
881
  </div>
882
  """,
883
+ []
 
884
  )
885
  return
886
 
887
+ # custom js for lecture container features
888
  js_code = """
889
  () => {
890
  // Function to wait for an element to appear in the DOM
 
 
 
 
 
 
891
  function waitForElement(selector, callback, maxAttempts = 50, interval = 100) {
892
  let attempts = 0;
893
  const intervalId = setInterval(() => {
 
932
  const totalSlides = lectureData.htmlFiles.length;
933
  let audioElements = [];
934
  let isPlaying = false;
935
+ let hasNavigated = false; // Track if user has used prev/next buttons
 
936
 
937
  // Wait for slide-content element
938
  waitForElement('#slide-content', (slideContent) => {
 
964
  if (body) {
965
  const textLength = body.textContent.length;
966
  const screenWidth = window.innerWidth;
967
+ // Base font size: 12px max on large screens, scale down to 8px on small screens
968
+ let baseFontSize = Math.min(12, Math.max(12, 16 * (screenWidth / 1920))); // Scale with screen width (1920px as reference)
969
+ // Adjust inversely with content length
970
+ const adjustedFontSize = Math.max(12, baseFontSize * (1000 / (textLength + 100))); // Minimum 8px, scale down with length
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
971
  const elements = body.getElementsByTagName('*');
972
  for (let elem of elements) {
973
  elem.style.fontSize = `${adjustedFontSize}px`;
974
  }
975
+ console.log(`Adjusted font size to ${adjustedFontSize}px for ${textLength} characters on ${screenWidth}px width`);
 
976
  }
977
  };
978
  });
 
996
  if (audio && audio.pause) {
997
  audio.pause();
998
  audio.currentTime = 0;
999
+ audio.style.border = 'none'; // Reset border
1000
  console.log("Paused and reset audio:", audio.id);
1001
  }
1002
  });
 
1022
 
1023
  function prevSlide() {
1024
  console.log("Previous button clicked, current slide:", currentSlide);
1025
+ hasNavigated = true; // User has navigated
1026
  if (currentSlide > 0) {
1027
  currentSlide--;
1028
  updateSlide(() => {
 
1040
 
1041
  function nextSlide() {
1042
  console.log("Next button clicked, current slide:", currentSlide);
1043
+ hasNavigated = true; // User has navigated
1044
  if (currentSlide < totalSlides - 1) {
1045
  currentSlide++;
1046
  updateSlide(() => {
 
1064
  return;
1065
  }
1066
  const playIcon = playBtn.querySelector('i');
1067
+ if (playIcon.className.includes('fa-pause')) {
 
1068
  // Pause playback
1069
  isPlaying = false;
1070
  audioElements.forEach(audio => {
1071
  if (audio && audio.pause) {
1072
  audio.pause();
1073
+ audio.currentTime = 0;
1074
  audio.style.border = 'none';
1075
  console.log("Paused audio:", audio.id);
1076
  }
 
1078
  playIcon.className = 'fas fa-play';
1079
  return;
1080
  }
 
1081
  // Start playback
1082
+ currentSlide = 0;
1083
+ let index = 0;
1084
  isPlaying = true;
1085
  playIcon.className = 'fas fa-pause';
 
 
 
1086
  updateSlide(() => {
1087
  function playNext() {
1088
+ if (index >= totalSlides || !isPlaying) {
1089
  isPlaying = false;
1090
  playIcon.className = 'fas fa-play';
1091
  audioElements.forEach(audio => {
 
1094
  console.log("Finished playing all slides or paused");
1095
  return;
1096
  }
1097
+ currentSlide = index;
 
1098
  updateSlide(() => {
1099
+ const audio = audioElements[index];
1100
  if (audio && audio.play) {
1101
+ // Highlight the current audio element
1102
  audioElements.forEach(a => a.style.border = 'none');
1103
  audio.style.border = '5px solid #16cd16';
1104
  audio.style.borderRadius = '30px';
1105
+ console.log(`Attempting to play audio for slide ${index + 1}`);
 
1106
  audio.play().then(() => {
1107
+ console.log(`Playing audio for slide ${index + 1}`);
1108
+ // Remove any existing ended listeners to prevent duplicates
1109
  audio.onended = null;
1110
  audio.addEventListener('ended', () => {
1111
+ console.log(`Audio ended for slide ${index + 1}`);
1112
+ index++;
1113
+ playNext();
 
 
1114
  }, { once: true });
1115
+ // Fallback: Check if audio is stuck (e.g., duration not advancing)
1116
  const checkDuration = setInterval(() => {
1117
  if (!isPlaying) {
1118
  clearInterval(checkDuration);
1119
  return;
1120
  }
1121
  if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
1122
+ console.log(`Fallback: Audio for slide ${index + 1} considered ended`);
1123
  clearInterval(checkDuration);
1124
+ audio.onended = null; // Prevent duplicate triggers
1125
+ index++;
1126
  playNext();
1127
  }
1128
  }, 1000);
1129
  }).catch(e => {
1130
+ console.error(`Audio play failed for slide ${index + 1}:`, e);
1131
+ // Retry playing the same slide after a short delay
1132
  setTimeout(() => {
1133
+ audio.play().then(() => {
1134
+ console.log(`Retry succeeded for slide ${index + 1}`);
1135
+ audio.onended = null;
1136
+ audio.addEventListener('ended', () => {
1137
+ console.log(`Audio ended for slide ${index + 1}`);
1138
+ index++;
 
 
 
 
 
 
 
 
1139
  playNext();
1140
+ }, { once: true });
1141
+ const checkDuration = setInterval(() => {
1142
+ if (!isPlaying) {
1143
+ clearInterval(checkDuration);
1144
+ return;
1145
+ }
1146
+ if (audio.duration && audio.currentTime >= audio.duration - 0.1) {
1147
+ console.log(`Fallback: Audio for slide ${index + 1} considered ended`);
1148
+ clearInterval(checkDuration);
1149
+ audio.onended = null;
1150
+ index++;
1151
+ playNext();
1152
+ }
1153
+ }, 1000);
1154
+ }).catch(e => {
1155
+ console.error(`Retry failed for slide ${index + 1}:`, e);
1156
+ index++; // Move to next slide if retry fails
1157
+ playNext();
1158
+ });
1159
  }, 500);
1160
  });
1161
  } else {
1162
+ index++;
1163
  playNext();
1164
  }
1165
  });
 
1221
  const lectureContainer = document.getElementById('lecture-container');
1222
  if (lectureContainer) {
1223
  console.log("Lecture container detected in DOM");
1224
+ observer.disconnect(); // Stop observing once found
1225
  initializeSlides();
1226
  }
1227
  }
1228
  });
1229
  });
1230
+
1231
+ // Start observing the document body for changes
1232
  observer.observe(document.body, { childList: true, subtree: true });
1233
  console.log("Started observing DOM for lecture container");
1234
  }
1235
  """
1236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1237
  # Gradio interface
1238
  with gr.Blocks(
1239
+ title="Agent Feynman",
1240
  css="""
1241
+ h1 {text-align: center}
1242
+ h2 {text-align: center}
1243
  #lecture-container {font-family: 'Times New Roman', Times, serif;}
1244
  #slide-content {font-size: 48px; line-height: 1.2;}
1245
+ #form-group {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; font-weight: 900; color: #000; background-color: white;}
1246
  #download {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px;}
 
1247
  #slide-display {box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important; border-radius: 30px; background-color: white;}
 
1248
  button {transition: background-color 0.3s;}
1249
  button:hover {background-color: #e0e0e0;}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1250
  """,
1251
  js=js_code,
1252
  head='<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">'
1253
  ) as demo:
1254
  gr.Markdown("""
1255
+ # <center>Professor AI Feynman: A Multi-Agent Tool for Learning Anything the Feynman way.</center>
1256
+ ## <center>(Jaward Sesay - Microsoft AI Agent Hackathon Submission)</center>""")
 
1257
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
1258
  with gr.Column(scale=1):
1259
  with gr.Group(elem_id="form-group"):
1260
  title = gr.Textbox(label="Lecture Title", placeholder="e.g. Introduction to AI")
 
1279
  api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama or Azure AI Foundry (use GITHUB_TOKEN env var)")
1280
  serpapi_key = gr.Textbox(label="SerpApi Key (For Research Agent)", type="password", placeholder="Enter your SerpApi key (optional)")
1281
  num_slides = gr.Slider(1, 20, step=1, label="Number of Lecture Slides (will add intro and closing slides)", value=3)
1282
+ speaker_audio = gr.Audio(value="feynman.mp3", label="Speaker sample speech (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
 
 
 
 
 
 
1283
  generate_btn = gr.Button("Generate Lecture")
 
 
1284
  with gr.Column(scale=2):
1285
  default_slide_html = """
1286
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 30px; box-shadow: 0 0 2rem rgba(0, 0, 0, .14) !important;">
1287
+ <h2 style="font-style: italic; color: #000;">Waiting for lecture content...</h2>
1288
+ <p style="margin-top: 10px; font-size: 16px;color: #000">Please Generate lecture content via the form on the left first before lecture begins</p>
 
 
 
 
 
 
 
 
1289
  </div>
1290
  """
1291
  slide_display = gr.HTML(label="Lecture Slides", value=default_slide_html, elem_id="slide-display")
 
1292
  file_output = gr.File(label="Download Lecture Materials", elem_id="download")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1293
 
1294
  speaker_audio.change(
1295
  fn=update_audio_preview,
 
1303
  outputs=[slide_display, file_output]
1304
  )
1305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1306
  if __name__ == "__main__":
1307
  demo.launch(allowed_paths=[OUTPUT_DIR])