Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import torch
|
|
|
8 |
from serpapi import GoogleSearch
|
9 |
from pydantic import BaseModel
|
10 |
from autogen_agentchat.agents import AssistantAgent
|
@@ -105,27 +106,36 @@ def get_model_client(service, api_key):
|
|
105 |
else:
|
106 |
raise ValueError("Invalid service")
|
107 |
|
108 |
-
# Helper function to clean script text
|
109 |
def clean_script_text(script):
|
110 |
if not script or not isinstance(script, str):
|
111 |
logger.error("Invalid script input: %s", script)
|
112 |
return None
|
113 |
-
|
114 |
-
|
115 |
-
script = re.sub(r"\
|
116 |
-
script = re.sub(r"
|
117 |
-
script = re.sub(r"
|
118 |
-
script = script.replace("humanlike", "human
|
119 |
-
script = re.sub(r"\s+", " ", script).strip()
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
return None
|
|
|
|
|
129 |
return script
|
130 |
|
131 |
# Helper function to validate and convert speaker audio (MP3 or WAV)
|
@@ -344,11 +354,12 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
|
|
344 |
model_client=model_client,
|
345 |
handoffs=["script_agent"],
|
346 |
system_message=f"""
|
347 |
-
You are a Slide Agent. Using the research from the conversation history and the number of content slides ({num_slides})
|
348 |
-
Example output:
|
349 |
```json
|
350 |
[
|
351 |
{{"title": "Slide 1", "content": "Content for slide 1"}},
|
|
|
352 |
{{"title": "Quiz", "content": "Quiz questions"}},
|
353 |
{{"title": "Assignment", "content": "Assignment details"}},
|
354 |
{{"title": "Thank You", "content": "Thank you message"}}
|
@@ -362,14 +373,14 @@ Example output:
|
|
362 |
model_client=model_client,
|
363 |
handoffs=["feynman_agent"],
|
364 |
system_message=f"""
|
365 |
-
You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each
|
366 |
Example for 1 content slide:
|
367 |
```json
|
368 |
[
|
369 |
-
"
|
370 |
-
"
|
371 |
-
"
|
372 |
-
"
|
373 |
]
|
374 |
```""",
|
375 |
output_content_type=None,
|
@@ -379,8 +390,10 @@ Example for 1 content slide:
|
|
379 |
name="feynman_agent",
|
380 |
model_client=model_client,
|
381 |
handoffs=[],
|
382 |
-
system_message="
|
383 |
-
|
|
|
|
|
384 |
|
385 |
swarm = Swarm(
|
386 |
participants=[research_agent, slide_agent, script_agent, feynman_agent],
|
@@ -521,7 +534,7 @@ Example for 1 content slide:
|
|
521 |
|
522 |
elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
|
523 |
logger.info("Feynman Agent completed lecture review: %s", message.content)
|
524 |
-
progress =
|
525 |
label = "Lecture materials ready. Generating audio..."
|
526 |
yield html_with_progress(label, progress)
|
527 |
await asyncio.sleep(0.1)
|
@@ -544,6 +557,17 @@ Example for 1 content slide:
|
|
544 |
"""
|
545 |
return
|
546 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
547 |
if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
|
548 |
logger.error("Scripts are not a list of strings: %s", scripts)
|
549 |
yield f"""
|
@@ -554,7 +578,7 @@ Example for 1 content slide:
|
|
554 |
"""
|
555 |
return
|
556 |
|
557 |
-
if len(
|
558 |
logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
|
559 |
yield f"""
|
560 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
@@ -598,6 +622,11 @@ Example for 1 content slide:
|
|
598 |
if not cleaned_script:
|
599 |
logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
|
600 |
audio_files.append(None)
|
|
|
|
|
|
|
|
|
|
|
601 |
continue
|
602 |
|
603 |
max_retries = 2
|
@@ -616,11 +645,9 @@ Example for 1 content slide:
|
|
616 |
|
617 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
618 |
audio_files.append(audio_file)
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
</div>
|
623 |
-
"""
|
624 |
await asyncio.sleep(0.1)
|
625 |
break
|
626 |
except Exception as e:
|
@@ -628,6 +655,10 @@ Example for 1 content slide:
|
|
628 |
if attempt == max_retries:
|
629 |
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
630 |
audio_files.append(None)
|
|
|
|
|
|
|
|
|
631 |
break
|
632 |
|
633 |
audio_files = [f"file://{os.path.abspath(f)}" if f else None for f in audio_files]
|
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import torch
|
8 |
+
import random
|
9 |
from serpapi import GoogleSearch
|
10 |
from pydantic import BaseModel
|
11 |
from autogen_agentchat.agents import AssistantAgent
|
|
|
106 |
else:
|
107 |
raise ValueError("Invalid service")
|
108 |
|
109 |
+
# Helper function to clean script text and make it natural
|
110 |
def clean_script_text(script):
|
111 |
if not script or not isinstance(script, str):
|
112 |
logger.error("Invalid script input: %s", script)
|
113 |
return None
|
114 |
+
|
115 |
+
# Minimal cleaning to preserve natural language
|
116 |
+
script = re.sub(r"\*\*Slide \d+:.*?\*\*", "", script) # Remove slide headers
|
117 |
+
script = re.sub(r"\[.*?\]", "", script) # Remove bracketed content
|
118 |
+
script = re.sub(r"Title:.*?\n|Content:.*?\n", "", script) # Remove metadata
|
119 |
+
script = script.replace("humanlike", "human-like").replace("problemsolving", "problem-solving")
|
120 |
+
script = re.sub(r"\s+", " ", script).strip() # Normalize whitespace
|
121 |
+
|
122 |
+
# Convert bullet points to spoken cues
|
123 |
+
script = re.sub(r"^\s*-\s*", "So, ", script, flags=re.MULTILINE)
|
124 |
+
|
125 |
+
# Add non-verbal words randomly (e.g., "um," "you know," "like")
|
126 |
+
non_verbal = ["um, ", "you know, ", "like, "]
|
127 |
+
words = script.split()
|
128 |
+
for i in range(len(words) - 1, -1, -1):
|
129 |
+
if random.random() < 0.1: # 10% chance per word
|
130 |
+
words.insert(i, random.choice(non_verbal))
|
131 |
+
script = " ".join(words)
|
132 |
+
|
133 |
+
# Basic validation
|
134 |
+
if len(script) < 10:
|
135 |
+
logger.error("Cleaned script too short (%d characters): %s", len(script), script)
|
136 |
return None
|
137 |
+
|
138 |
+
logger.info("Cleaned and naturalized script: %s", script)
|
139 |
return script
|
140 |
|
141 |
# Helper function to validate and convert speaker audio (MP3 or WAV)
|
|
|
354 |
model_client=model_client,
|
355 |
handoffs=["script_agent"],
|
356 |
system_message=f"""
|
357 |
+
You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({num_slides}), generate exactly {num_slides} content slides, plus one quiz slide, one assignment slide, and one thank-you slide, for a total of {num_slides + 3} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {num_slides + 3} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
|
358 |
+
Example output for 2 content slides:
|
359 |
```json
|
360 |
[
|
361 |
{{"title": "Slide 1", "content": "Content for slide 1"}},
|
362 |
+
{{"title": "Slide 2", "content": "Content for slide 2"}},
|
363 |
{{"title": "Quiz", "content": "Quiz questions"}},
|
364 |
{{"title": "Assignment", "content": "Assignment details"}},
|
365 |
{{"title": "Thank You", "content": "Thank you message"}}
|
|
|
373 |
model_client=model_client,
|
374 |
handoffs=["feynman_agent"],
|
375 |
system_message=f"""
|
376 |
+
You are a Script Agent. Access the JSON array of {num_slides + 3} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {num_slides + 3} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {num_slides + 3} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
|
377 |
Example for 1 content slide:
|
378 |
```json
|
379 |
[
|
380 |
+
"So, this slide, um, covers the main topic in a fun way.",
|
381 |
+
"Alright, you know, answer these quiz questions.",
|
382 |
+
"Here's your, like, assignment to complete.",
|
383 |
+
"Thanks for, um, attending today!"
|
384 |
]
|
385 |
```""",
|
386 |
output_content_type=None,
|
|
|
390 |
name="feynman_agent",
|
391 |
model_client=model_client,
|
392 |
handoffs=[],
|
393 |
+
system_message=f"""
|
394 |
+
You are Agent Feynman. Review the slides and scripts from the conversation history to ensure coherence, completeness, and that exactly {num_slides + 3} slides and {num_slides + 3} scripts are received. Output a confirmation message summarizing the number of slides and scripts received. If slides or scripts are missing, invalid, or do not match the expected count ({num_slides + 3}), report the issue clearly. Use 'TERMINATE' to signal completion.
|
395 |
+
Example: 'Received {num_slides + 3} slides and {num_slides + 3} scripts. Lecture is coherent. TERMINATE'
|
396 |
+
""")
|
397 |
|
398 |
swarm = Swarm(
|
399 |
participants=[research_agent, slide_agent, script_agent, feynman_agent],
|
|
|
534 |
|
535 |
elif source == "feynman_agent" and isinstance(message, TextMessage) and "TERMINATE" in message.content:
|
536 |
logger.info("Feynman Agent completed lecture review: %s", message.content)
|
537 |
+
progress = 90 # Set to 90% before audio generation
|
538 |
label = "Lecture materials ready. Generating audio..."
|
539 |
yield html_with_progress(label, progress)
|
540 |
await asyncio.sleep(0.1)
|
|
|
557 |
"""
|
558 |
return
|
559 |
|
560 |
+
expected_slide_count = num_slides + 3
|
561 |
+
if len(slides) != expected_slide_count:
|
562 |
+
logger.error("Expected %d slides (including %d content slides + 3), but received %d", expected_slide_count, num_slides, len(slides))
|
563 |
+
yield f"""
|
564 |
+
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
565 |
+
<h2 style="color: #d9534f;">Incorrect number of slides</h2>
|
566 |
+
<p style="margin-top: 20px;">Expected {expected_slide_count} slides ({num_slides} content slides + quiz, assignment, thank-you), but generated {len(slides)}. Please try again.</p>
|
567 |
+
</div>
|
568 |
+
"""
|
569 |
+
return
|
570 |
+
|
571 |
if not isinstance(scripts, list) or not all(isinstance(s, str) for s in scripts):
|
572 |
logger.error("Scripts are not a list of strings: %s", scripts)
|
573 |
yield f"""
|
|
|
578 |
"""
|
579 |
return
|
580 |
|
581 |
+
if len(scripts) != expected_slide_count:
|
582 |
logger.error("Mismatch between number of slides (%d) and scripts (%d)", len(slides), len(scripts))
|
583 |
yield f"""
|
584 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
|
|
622 |
if not cleaned_script:
|
623 |
logger.error("Skipping audio for slide %d due to empty or invalid script", i + 1)
|
624 |
audio_files.append(None)
|
625 |
+
# Update progress (even for skipped slides)
|
626 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
|
627 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
628 |
+
yield html_with_progress(label, progress)
|
629 |
+
await asyncio.sleep(0.1)
|
630 |
continue
|
631 |
|
632 |
max_retries = 2
|
|
|
645 |
|
646 |
logger.info("Generated audio for slide %d: %s", i + 1, audio_file)
|
647 |
audio_files.append(audio_file)
|
648 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
|
649 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
650 |
+
yield html_with_progress(label, progress)
|
|
|
|
|
651 |
await asyncio.sleep(0.1)
|
652 |
break
|
653 |
except Exception as e:
|
|
|
655 |
if attempt == max_retries:
|
656 |
logger.error("Max retries reached for slide %d, skipping", i + 1)
|
657 |
audio_files.append(None)
|
658 |
+
progress = 90 + ((i + 1) / len(scripts)) * 10 # Progress from 90% to 100%
|
659 |
+
label = f"Generated audio for slide {i + 1}/{len(scripts)}..."
|
660 |
+
yield html_with_progress(label, progress)
|
661 |
+
await asyncio.sleep(0.1)
|
662 |
break
|
663 |
|
664 |
audio_files = [f"file://{os.path.abspath(f)}" if f else None for f in audio_files]
|