Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ import gradio as gr
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import torch
|
8 |
-
import random
|
9 |
from serpapi import GoogleSearch
|
10 |
from pydantic import BaseModel
|
11 |
from autogen_agentchat.agents import AssistantAgent
|
@@ -89,8 +88,8 @@ def search_web(query: str, serpapi_key: str) -> str:
|
|
89 |
def html_with_progress(label, progress):
|
90 |
return f"""
|
91 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
92 |
-
<div style="width:
|
93 |
-
<div style="width: {progress}%; height:
|
94 |
</div>
|
95 |
<h2 style="font-style: italic; color: #555;">{label}</h2>
|
96 |
</div>
|
@@ -109,7 +108,7 @@ def get_model_client(service, api_key):
|
|
109 |
else:
|
110 |
raise ValueError("Invalid service")
|
111 |
|
112 |
-
# Helper function to clean script text
|
113 |
def clean_script_text(script):
|
114 |
if not script or not isinstance(script, str):
|
115 |
logger.error("Invalid script input: %s", script)
|
@@ -121,20 +120,11 @@ def clean_script_text(script):
|
|
121 |
script = script.replace("humanlike", "human-like").replace("problemsolving", "problem-solving")
|
122 |
script = re.sub(r"\s+", " ", script).strip()
|
123 |
|
124 |
-
script = re.sub(r"^\s*-\s*", "So, ", script, flags=re.MULTILINE)
|
125 |
-
|
126 |
-
non_verbal = ["um, ", "you know, ", "like, "]
|
127 |
-
words = script.split()
|
128 |
-
for i in range(len(words) - 1, -1, -1):
|
129 |
-
if random.random() < 0.1:
|
130 |
-
words.insert(i, random.choice(non_verbal))
|
131 |
-
script = " ".join(words)
|
132 |
-
|
133 |
if len(script) < 10:
|
134 |
logger.error("Cleaned script too short (%d characters): %s", len(script), script)
|
135 |
return None
|
136 |
|
137 |
-
logger.info("Cleaned
|
138 |
return script
|
139 |
|
140 |
# Helper function to validate and convert speaker audio
|
@@ -306,8 +296,8 @@ def generate_markdown_slides(slides, title, speaker="Prof. AI Feynman", date="Ap
|
|
306 |
slide_number = i + 1
|
307 |
content = slide['content']
|
308 |
|
309 |
-
# First
|
310 |
-
if i == 0
|
311 |
slide_md = f"""
|
312 |
# {slide['title']}
|
313 |
{content}
|
@@ -367,8 +357,7 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
|
|
367 |
|
368 |
model_client = get_model_client(api_service, api_key)
|
369 |
|
370 |
-
|
371 |
-
total_slides = actual_content_slides + 3 # Content slides + quiz, assignment, thank-you
|
372 |
|
373 |
research_agent = AssistantAgent(
|
374 |
name="research_agent",
|
@@ -382,15 +371,12 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
|
|
382 |
model_client=model_client,
|
383 |
handoffs=["script_agent"],
|
384 |
system_message=f"""
|
385 |
-
You are a Slide Agent. Using the research from the conversation history and the specified number of
|
386 |
-
Example output for 2
|
387 |
```json
|
388 |
[
|
389 |
{{"title": "Slide 1", "content": "Content for slide 1"}},
|
390 |
-
{{"title": "Slide 2", "content": "Content for slide 2"}}
|
391 |
-
{{"title": "Quiz", "content": "Quiz questions"}},
|
392 |
-
{{"title": "Assignment", "content": "Assignment details"}},
|
393 |
-
{{"title": "Thank You", "content": "Thank you message"}}
|
394 |
]
|
395 |
```""",
|
396 |
output_content_type=None,
|
@@ -401,16 +387,13 @@ Example output for 2 content slides:
|
|
401 |
model_client=model_client,
|
402 |
handoffs=["feynman_agent"],
|
403 |
system_message=f"""
|
404 |
-
You are a Script Agent. Access the JSON array of {total_slides} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a
|
405 |
-
Example for 3
|
406 |
```json
|
407 |
[
|
408 |
-
"
|
409 |
-
"
|
410 |
-
"
|
411 |
-
"Alright, you know, answer these quiz questions.",
|
412 |
-
"Here's your, like, assignment to complete.",
|
413 |
-
"Thanks for, um, attending today!"
|
414 |
]
|
415 |
```""",
|
416 |
output_content_type=None,
|
@@ -440,10 +423,10 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
440 |
Topic: {topic}
|
441 |
Additional Instructions: {instructions}
|
442 |
Audience: {lecture_type}
|
443 |
-
Number of
|
444 |
Please start by researching the topic.
|
445 |
"""
|
446 |
-
logger.info("Starting lecture generation for topic: %s with %d
|
447 |
|
448 |
slides = None
|
449 |
scripts = None
|
@@ -486,7 +469,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
486 |
slide_retry_count += 1
|
487 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
488 |
retry_message = TextMessage(
|
489 |
-
content=f"Please generate exactly {total_slides} slides
|
490 |
source="user",
|
491 |
recipient="slide_agent"
|
492 |
)
|
@@ -526,7 +509,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
526 |
slide_retry_count += 1
|
527 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
528 |
retry_message = TextMessage(
|
529 |
-
content=f"Please generate exactly {total_slides} slides
|
530 |
source="user",
|
531 |
recipient="slide_agent"
|
532 |
)
|
@@ -550,7 +533,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
550 |
slide_retry_count += 1
|
551 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
552 |
retry_message = TextMessage(
|
553 |
-
content=f"Please generate exactly {total_slides} slides
|
554 |
source="user",
|
555 |
recipient="slide_agent"
|
556 |
)
|
@@ -614,11 +597,11 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
614 |
return
|
615 |
|
616 |
if len(slides) != total_slides:
|
617 |
-
logger.error("Expected %d slides
|
618 |
yield f"""
|
619 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
620 |
<h2 style="color: #d9534f;">Incorrect number of slides</h2>
|
621 |
-
<p style="margin-top: 20px;">Expected {total_slides} slides
|
622 |
</div>
|
623 |
"""
|
624 |
return
|
@@ -725,15 +708,15 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
|
|
725 |
txt_links = ""
|
726 |
for txt_file in txt_files:
|
727 |
file_path = os.path.join(OUTPUT_DIR, txt_file)
|
728 |
-
txt_links += f'<a href="file/{file_path}" download>{txt_file}</a
|
729 |
|
730 |
# Generate audio timeline
|
731 |
audio_timeline = ""
|
732 |
for i, audio_file in enumerate(audio_files):
|
733 |
if audio_file:
|
734 |
-
audio_timeline += f'<span id="audio-{i+1}">{os.path.basename(audio_file)}</span
|
735 |
else:
|
736 |
-
audio_timeline += f'<span id="audio-{i+1}">slide_{i+1}.mp3</span
|
737 |
|
738 |
slides_info = json.dumps({"slides": markdown_slides, "audioFiles": audio_files})
|
739 |
|
@@ -887,7 +870,7 @@ with gr.Blocks(title="Agent Feynman") as demo:
|
|
887 |
)
|
888 |
api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama")
|
889 |
serpapi_key = gr.Textbox(label="SerpApi Key", type="password", placeholder="Enter your SerpApi key")
|
890 |
-
num_slides = gr.Slider(1, 20, step=1, label="Number of
|
891 |
speaker_audio = gr.Audio(label="Speaker sample audio (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
|
892 |
generate_btn = gr.Button("Generate Lecture")
|
893 |
with gr.Column(scale=2):
|
|
|
5 |
import asyncio
|
6 |
import logging
|
7 |
import torch
|
|
|
8 |
from serpapi import GoogleSearch
|
9 |
from pydantic import BaseModel
|
10 |
from autogen_agentchat.agents import AssistantAgent
|
|
|
88 |
def html_with_progress(label, progress):
|
89 |
return f"""
|
90 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
91 |
+
<div style="width: 70%; background-color: #FFFFFF; border-radius: 80px; overflow: hidden; margin-bottom: 20px;">
|
92 |
+
<div style="width: {progress}%; height: 15px; background-color: #4CAF50; border-radius: 80px;"></div>
|
93 |
</div>
|
94 |
<h2 style="font-style: italic; color: #555;">{label}</h2>
|
95 |
</div>
|
|
|
108 |
else:
|
109 |
raise ValueError("Invalid service")
|
110 |
|
111 |
+
# Helper function to clean script text
|
112 |
def clean_script_text(script):
|
113 |
if not script or not isinstance(script, str):
|
114 |
logger.error("Invalid script input: %s", script)
|
|
|
120 |
script = script.replace("humanlike", "human-like").replace("problemsolving", "problem-solving")
|
121 |
script = re.sub(r"\s+", " ", script).strip()
|
122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
if len(script) < 10:
|
124 |
logger.error("Cleaned script too short (%d characters): %s", len(script), script)
|
125 |
return None
|
126 |
|
127 |
+
logger.info("Cleaned script: %s", script)
|
128 |
return script
|
129 |
|
130 |
# Helper function to validate and convert speaker audio
|
|
|
296 |
slide_number = i + 1
|
297 |
content = slide['content']
|
298 |
|
299 |
+
# First slide has no header/footer, others have header and footer
|
300 |
+
if i == 0:
|
301 |
slide_md = f"""
|
302 |
# {slide['title']}
|
303 |
{content}
|
|
|
357 |
|
358 |
model_client = get_model_client(api_service, api_key)
|
359 |
|
360 |
+
total_slides = num_slides # Use exactly the number of slides from input
|
|
|
361 |
|
362 |
research_agent = AssistantAgent(
|
363 |
name="research_agent",
|
|
|
371 |
model_client=model_client,
|
372 |
handoffs=["script_agent"],
|
373 |
system_message=f"""
|
374 |
+
You are a Slide Agent. Using the research from the conversation history and the specified number of slides ({total_slides}), generate exactly {total_slides} content slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {total_slides} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
|
375 |
+
Example output for 2 slides:
|
376 |
```json
|
377 |
[
|
378 |
{{"title": "Slide 1", "content": "Content for slide 1"}},
|
379 |
+
{{"title": "Slide 2", "content": "Content for slide 2"}}
|
|
|
|
|
|
|
380 |
]
|
381 |
```""",
|
382 |
output_content_type=None,
|
|
|
387 |
model_client=model_client,
|
388 |
handoffs=["feynman_agent"],
|
389 |
system_message=f"""
|
390 |
+
You are a Script Agent. Access the JSON array of {total_slides} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone as a professor would deliver it. Avoid using non-verbal fillers such as "um," "you know," or "like." Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
|
391 |
+
Example for 3 slides:
|
392 |
```json
|
393 |
[
|
394 |
+
"Hello everyone, welcome to Agents 101. I am Jaward, your primary instructor for this course.",
|
395 |
+
"Today, we will cover the syllabus for this semester, providing a gentle introduction to AI agents.",
|
396 |
+
"Let us define what an AI agent is: it refers to a system or program capable of autonomously performing tasks on behalf of a user or another system."
|
|
|
|
|
|
|
397 |
]
|
398 |
```""",
|
399 |
output_content_type=None,
|
|
|
423 |
Topic: {topic}
|
424 |
Additional Instructions: {instructions}
|
425 |
Audience: {lecture_type}
|
426 |
+
Number of Slides: {total_slides}
|
427 |
Please start by researching the topic.
|
428 |
"""
|
429 |
+
logger.info("Starting lecture generation for topic: %s with %d slides", topic, total_slides)
|
430 |
|
431 |
slides = None
|
432 |
scripts = None
|
|
|
469 |
slide_retry_count += 1
|
470 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
471 |
retry_message = TextMessage(
|
472 |
+
content=f"Please generate exactly {total_slides} slides as per your instructions.",
|
473 |
source="user",
|
474 |
recipient="slide_agent"
|
475 |
)
|
|
|
509 |
slide_retry_count += 1
|
510 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
511 |
retry_message = TextMessage(
|
512 |
+
content=f"Please generate exactly {total_slides} slides as per your instructions.",
|
513 |
source="user",
|
514 |
recipient="slide_agent"
|
515 |
)
|
|
|
533 |
slide_retry_count += 1
|
534 |
logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
|
535 |
retry_message = TextMessage(
|
536 |
+
content=f"Please generate exactly {total_slides} slides as per your instructions.",
|
537 |
source="user",
|
538 |
recipient="slide_agent"
|
539 |
)
|
|
|
597 |
return
|
598 |
|
599 |
if len(slides) != total_slides:
|
600 |
+
logger.error("Expected %d slides, but received %d", total_slides, len(slides))
|
601 |
yield f"""
|
602 |
<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
|
603 |
<h2 style="color: #d9534f;">Incorrect number of slides</h2>
|
604 |
+
<p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
|
605 |
</div>
|
606 |
"""
|
607 |
return
|
|
|
708 |
txt_links = ""
|
709 |
for txt_file in txt_files:
|
710 |
file_path = os.path.join(OUTPUT_DIR, txt_file)
|
711 |
+
txt_links += f'<a href="file/{file_path}" download>{txt_file}</a> '
|
712 |
|
713 |
# Generate audio timeline
|
714 |
audio_timeline = ""
|
715 |
for i, audio_file in enumerate(audio_files):
|
716 |
if audio_file:
|
717 |
+
audio_timeline += f'<span id="audio-{i+1}">{os.path.basename(audio_file)}</span> '
|
718 |
else:
|
719 |
+
audio_timeline += f'<span id="audio-{i+1}">slide_{i+1}.mp3</span> '
|
720 |
|
721 |
slides_info = json.dumps({"slides": markdown_slides, "audioFiles": audio_files})
|
722 |
|
|
|
870 |
)
|
871 |
api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama")
|
872 |
serpapi_key = gr.Textbox(label="SerpApi Key", type="password", placeholder="Enter your SerpApi key")
|
873 |
+
num_slides = gr.Slider(1, 20, step=1, label="Number of Slides", value=3)
|
874 |
speaker_audio = gr.Audio(label="Speaker sample audio (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
|
875 |
generate_btn = gr.Button("Generate Lecture")
|
876 |
with gr.Column(scale=2):
|