Jaward commited on
Commit
e368dbe
·
verified ·
1 Parent(s): 6de00c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -32
app.py CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
5
  import asyncio
6
  import logging
7
  import torch
 
8
  from serpapi import GoogleSearch
9
  from pydantic import BaseModel
10
  from autogen_agentchat.agents import AssistantAgent
@@ -105,27 +106,36 @@ def get_model_client(service, api_key):
105
  else:
106
  raise ValueError("Invalid service")
107
 
108
- # Helper function to clean script text
109
  def clean_script_text(script):
110
  if not script or not isinstance(script, str):
111
  logger.error("Invalid script input: %s", script)
112
  return None
113
- script = re.sub(r"\*\*Slide \d+:.*?\*\*", "", script)
114
- script = re.sub(r"\*\*|\*|\s*-\s*", "", script)
115
- script = re.sub(r"\[.*?\]", "", script)
116
- script = re.sub(r"Title:.*?\n|Content:.*?\n", "", script)
117
- script = re.sub(r"-", " ", script)
118
- script = script.replace("humanlike", "human like").replace("problemsolving", "problem solving")
119
- script = re.sub(r"\s+", " ", script).strip()
120
- script = re.sub(r"[^\w\s.,!?']", "", script)
121
- if len(script) < 10 or len(script) > 500:
122
- logger.error("Cleaned script length invalid (%d characters): %s", len(script), script)
123
- return None
124
- sentences = re.split(r"[.!?]+", script)
125
- sentences = [s.strip() for s in sentences if s.strip()]
126
- if len(sentences) < 1 or len(sentences) > 5:
127
- logger.error("Cleaned script has invalid sentence count (%d): %s", len(sentences), script)
 
 
 
 
 
 
 
128
  return None
 
 
129
  return script
130
 
131
  # Helper function to validate and convert speaker audio (MP3 or WAV)
@@ -344,11 +354,12 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
344
  model_client=model_client,
345
  handoffs=["script_agent"],
346
  system_message=f"""
347
- You are a Slide Agent. Using the research from the conversation history and the number of content slides ({num_slides}) specified in the initial message, generate exactly {num_slides} content slides, then add a quiz slide, an assignment slide, and a thank-you slide. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and complete before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
348
- Example output:
349
  ```json
350
  [
351
  {{"title": "Slide 1", "content": "Content for slide 1"}},
 
352
  {{"title": "Quiz", "content": "Quiz questions"}},
353
  {{"title": "Assignment", "content": "Assignment details"}},
354
  {{"title": "Thank You", "content": "Thank you message"}}
@@ -362,14 +373,14 @@ Example output:
362
  model_client=model_client,
363
  handoffs=["feynman_agent"],
364
  system_message=f"""
365
- You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each slide, summarizing its content. Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
366
  Example for 1 content slide:
367
  ```json
368
  [
369
- "This slide covers the main topic.",
370
- "Answer these quiz questions.",
371
- "Complete this assignment.",
372
- "Thank you for attending."
373
  ]
374
  ```""",
375
  output_content_type=None,
@@ -379,8 +390,10 @@ Example for 1 content slide:
379
  name="feynman_agent",
380
  model_client=model_client,
381
  handoffs=[],
382
- system_message="You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence and completeness. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing or invalid, report the issue clearly. Use 'TERMINATE' to signal completion. Example: 'Received 5 slides and 5 scripts. Lecture is coherent. TERMINATE'"
383
- )
 
 
384
 
385
  swarm = Swarm(
386
  participants=[research_agent, slide_agent, script_agent, feynman_agent],
@@ -521,7 +534,7 @@ Example for 1 content slide:
521
 
522
  elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
523
  logger.info("Feynman Agent completed lecture review: %s", message.content)
524
- progress = 100
525
  label = "Lecture materials ready. Generating audio..."
526
  yield html_with_progress(label, progress)
527
  await asyncio.sleep(0.1)
@@ -544,6 +557,17 @@ Example for 1 content slide:
544
  """
545
  return
546
 
 
 
 
 
 
 
 
 
 
 
 
547
  if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
548
  logger.error("Scripts are not a list of strings: %s", scripts)
549
  yield f"""
@@ -554,7 +578,7 @@ Example for 1 content slide:
554
  """
555
  return
556
 
557
- if len(slides) != len(scripts):
558
  logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
559
  yield f"""
560
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
@@ -598,6 +622,11 @@ Example for 1 content slide:
598
  if not cleaned_script:
599
  logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
600
  audio_files.append(None)
 
 
 
 
 
601
  continue
602
 
603
  max_retries = 2
@@ -616,11 +645,9 @@ Example for 1 content slide:
616
 
617
  logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
618
  audio_files.append(audio_file)
619
- yield f"""
620
- <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
621
- <h2 style="font-style: italic; color: #555;">Generated audio for slide {i + 1}/{len(scripts)}...</h2>
622
- </div>
623
- """
624
  await asyncio.sleep(0.1)
625
  break
626
  except Exception as e:
@@ -628,6 +655,10 @@ Example for 1 content slide:
628
  if attempt == max_retries:
629
  logger.error("Max retries reached for slide %d, skipping", i + 1)
630
  audio_files.append(None)
 
 
 
 
631
  break
632
 
633
  audio_files = [f"file://{os.path.abspath(f)}" if f else None for f in audio_files]
 
5
  import asyncio
6
  import logging
7
  import torch
8
+ import random
9
  from serpapi import GoogleSearch
10
  from pydantic import BaseModel
11
  from autogen_agentchat.agents import AssistantAgent
 
106
  else:
107
  raise ValueError("Invalid service")
108
 
109
+ # Helper function to clean script text and make it natural
110
  def clean_script_text(script):
111
  if not script or not isinstance(script, str):
112
  logger.error("Invalid script input: %s", script)
113
  return None
114
+
115
+ # Minimal cleaning to preserve natural language
116
+ script = re.sub(r"\*\*Slide \d+:.*?\*\*", "", script) # Remove slide headers
117
+ script = re.sub(r"\[.*?\]", "", script) # Remove bracketed content
118
+ script = re.sub(r"Title:.*?\n|Content:.*?\n", "", script) # Remove metadata
119
+ script = script.replace("humanlike", "human-like").replace("problemsolving", "problem-solving")
120
+ script = re.sub(r"\s+", " ", script).strip() # Normalize whitespace
121
+
122
+ # Convert bullet points to spoken cues
123
+ script = re.sub(r"^\s*-\s*", "So, ", script, flags=re.MULTILINE)
124
+
125
+ # Add non-verbal words randomly (e.g., "um," "you know," "like")
126
+ non_verbal = ["um, ", "you know, ", "like, "]
127
+ words = script.split()
128
+ for i in range(len(words) - 1, -1, -1):
129
+ if random.random() < 0.1: # 10% chance per word
130
+ words.insert(i, random.choice(non_verbal))
131
+ script = " ".join(words)
132
+
133
+ # Basic validation
134
+ if len(script) < 10:
135
+ logger.error("Cleaned script too short (%d characters): %s", len(script), script)
136
  return None
137
+
138
+ logger.info("Cleaned and naturalized script: %s", script)
139
  return script
140
 
141
  # Helper function to validate and convert speaker audio (MP3 or WAV)
 
354
  model_client=model_client,
355
  handoffs=["script_agent"],
356
  system_message=f"""
357
+ You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({num_slides}), generate exactly {num_slides} content slides, plus one quiz slide, one assignment slide, and one thank-you slide, for a total of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {num_slides + 3} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
358
+ Example output for 2 content slides:
359
  ```json
360
  [
361
  {{"title": "Slide 1", "content": "Content for slide 1"}},
362
+ {{"title": "Slide 2", "content": "Content for slide 2"}},
363
  {{"title": "Quiz", "content": "Quiz questions"}},
364
  {{"title": "Assignment", "content": "Assignment details"}},
365
  {{"title": "Thank You", "content": "Thank you message"}}
 
373
  model_client=model_client,
374
  handoffs=["feynman_agent"],
375
  system_message=f"""
376
+ You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
377
  Example for 1 content slide:
378
  ```json
379
  [
380
+ "So, this slide, um, covers the main topic in a fun way.",
381
+ "Alright, you know, answer these quiz questions.",
382
+ "Here's your, like, assignment to complete.",
383
+ "Thanks for, um, attending today!"
384
  ]
385
  ```""",
386
  output_content_type=None,
 
390
  name="feynman_agent",
391
  model_client=model_client,
392
  handoffs=[],
393
+ system_message=f"""
394
+ You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
395
+ Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
396
+ """)
397
 
398
  swarm = Swarm(
399
  participants=[research_agent, slide_agent, script_agent, feynman_agent],
 
534
 
535
  elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
536
  logger.info("Feynman Agent completed lecture review: %s", message.content)
537
+ progress = 90 # Set to 90% before audio generation
538
  label = "Lecture materials ready. Generating audio..."
539
  yield html_with_progress(label, progress)
540
  await asyncio.sleep(0.1)
 
557
  """
558
  return
559
 
560
+ expected_slide_count = num_slides + 3
561
+ if len(slides) != expected_slide_count:
562
+ logger.error("Expected %d slides (including %d content slides + 3), but received %d", expected_slide_count, num_slides, len(slides))
563
+ yield f"""
564
+ <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
565
+ <h2 style="color: #d9534f;">Incorrect number of slides</h2>
566
+ <p style="margin-top: 20px;">Expected {expected_slide_count} slides ({num_slides} content slides + quiz, assignment, thank-you), but generated {len(slides)}. Please try again.</p>
567
+ </div>
568
+ """
569
+ return
570
+
571
  if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
572
  logger.error("Scripts are not a list of strings: %s", scripts)
573
  yield f"""
 
578
  """
579
  return
580
 
581
+ if len(scripts) != expected_slide_count:
582
  logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
583
  yield f"""
584
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
 
622
  if not cleaned_script:
623
  logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
624
  audio_files.append(None)
625
+ # Update progress (even for skipped slides)
626
+ progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
627
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
628
+ yield html_with_progress(label, progress)
629
+ await asyncio.sleep(0.1)
630
  continue
631
 
632
  max_retries = 2
 
645
 
646
  logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
647
  audio_files.append(audio_file)
648
+ progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
649
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
650
+ yield html_with_progress(label, progress)
 
 
651
  await asyncio.sleep(0.1)
652
  break
653
  except Exception as e:
 
655
  if attempt == max_retries:
656
  logger.error("Max retries reached for slide %d, skipping", i + 1)
657
  audio_files.append(None)
658
+ progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
659
+ label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
660
+ yield html_with_progress(label, progress)
661
+ await asyncio.sleep(0.1)
662
  break
663
 
664
  audio_files = [f"file://{os.path.abspath(f)}" if f else None for f in audio_files]