Jaward commited on
Commit
d76ce22
·
verified ·
1 Parent(s): c8b75a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -43
app.py CHANGED
@@ -5,7 +5,6 @@ import gradio as gr
5
  import asyncio
6
  import logging
7
  import torch
8
- import random
9
  from serpapi import GoogleSearch
10
  from pydantic import BaseModel
11
  from autogen_agentchat.agents import AssistantAgent
@@ -89,8 +88,8 @@ def search_web(query: str, serpapi_key: str) -> str:
89
  def html_with_progress(label, progress):
90
  return f"""
91
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
92
- <div style="width: 100%; background-color: #FFFFFF; border-radius: 10px; overflow: hidden; margin-bottom: 20px;">
93
- <div style="width: {progress}%; height: 30px; background-color: #4CAF50; border-radius: 10px;"></div>
94
  </div>
95
  <h2 style="font-style: italic; color: #555;">{label}</h2>
96
  </div>
@@ -109,7 +108,7 @@ def get_model_client(service, api_key):
109
  else:
110
  raise ValueError("Invalid service")
111
 
112
- # Helper function to clean script text and make it natural
113
  def clean_script_text(script):
114
  if not script or not isinstance(script, str):
115
  logger.error("Invalid script input: %s", script)
@@ -121,20 +120,11 @@ def clean_script_text(script):
121
  script = script.replace("humanlike", "human-like").replace("problemsolving", "problem-solving")
122
  script = re.sub(r"\s+", " ", script).strip()
123
 
124
- script = re.sub(r"^\s*-\s*", "So, ", script, flags=re.MULTILINE)
125
-
126
- non_verbal = ["um, ", "you know, ", "like, "]
127
- words = script.split()
128
- for i in range(len(words) - 1, -1, -1):
129
- if random.random() < 0.1:
130
- words.insert(i, random.choice(non_verbal))
131
- script = " ".join(words)
132
-
133
  if len(script) < 10:
134
  logger.error("Cleaned script too short (%d characters): %s", len(script), script)
135
  return None
136
 
137
- logger.info("Cleaned and naturalized script: %s", script)
138
  return script
139
 
140
  # Helper function to validate and convert speaker audio
@@ -306,8 +296,8 @@ def generate_markdown_slides(slides, title, speaker="Prof. AI Feynman", date="Ap
306
  slide_number = i + 1
307
  content = slide['content']
308
 
309
- # First and last slides have no header/footer
310
- if i == 0 or i == len(slides) - 1:
311
  slide_md = f"""
312
  # {slide['title']}
313
  {content}
@@ -367,8 +357,7 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
367
 
368
  model_client = get_model_client(api_service, api_key)
369
 
370
- actual_content_slides = num_slides
371
- total_slides = actual_content_slides + 3 # Content slides + quiz, assignment, thank-you
372
 
373
  research_agent = AssistantAgent(
374
  name="research_agent",
@@ -382,15 +371,12 @@ async def on_generate(api_service, api_key, serpapi_key, title, topic, instructi
382
  model_client=model_client,
383
  handoffs=["script_agent"],
384
  system_message=f"""
385
- You are a Slide Agent. Using the research from the conversation history and the specified number of content slides ({actual_content_slides}), generate exactly {actual_content_slides} content slides, plus one quiz slide, one assignment slide, and one thank-you slide, for a total of {total_slides} slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {total_slides} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
386
- Example output for 2 content slides:
387
  ```json
388
  [
389
  {{"title": "Slide 1", "content": "Content for slide 1"}},
390
- {{"title": "Slide 2", "content": "Content for slide 2"}},
391
- {{"title": "Quiz", "content": "Quiz questions"}},
392
- {{"title": "Assignment", "content": "Assignment details"}},
393
- {{"title": "Thank You", "content": "Thank you message"}}
394
  ]
395
  ```""",
396
  output_content_type=None,
@@ -401,16 +387,13 @@ Example output for 2 content slides:
401
  model_client=model_client,
402
  handoffs=["feynman_agent"],
403
  system_message=f"""
404
- You are a Script Agent. Access the JSON array of {total_slides} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a natural, conversational tone as a speaker would, including occasional non-verbal words (e.g., "um," "you know," "like"). Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
405
- Example for 3 content slides:
406
  ```json
407
  [
408
- "So, this slide, um, covers the main topic in a fun way.",
409
- "The second slide introduces the key concepts.",
410
- "This third slide shows some interesting applications.",
411
- "Alright, you know, answer these quiz questions.",
412
- "Here's your, like, assignment to complete.",
413
- "Thanks for, um, attending today!"
414
  ]
415
  ```""",
416
  output_content_type=None,
@@ -440,10 +423,10 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
440
  Topic: {topic}
441
  Additional Instructions: {instructions}
442
  Audience: {lecture_type}
443
- Number of Content Slides: {actual_content_slides}
444
  Please start by researching the topic.
445
  """
446
- logger.info("Starting lecture generation for topic: %s with %d content slides", topic, actual_content_slides)
447
 
448
  slides = None
449
  scripts = None
@@ -486,7 +469,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
486
  slide_retry_count += 1
487
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
488
  retry_message = TextMessage(
489
- content=f"Please generate exactly {total_slides} slides ({actual_content_slides} content slides plus quiz, assignment, thank-you) as per your instructions.",
490
  source="user",
491
  recipient="slide_agent"
492
  )
@@ -526,7 +509,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
526
  slide_retry_count += 1
527
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
528
  retry_message = TextMessage(
529
- content=f"Please generate exactly {total_slides} slides ({actual_content_slides} content slides plus quiz, assignment, thank-you) as per your instructions.",
530
  source="user",
531
  recipient="slide_agent"
532
  )
@@ -550,7 +533,7 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
550
  slide_retry_count += 1
551
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
552
  retry_message = TextMessage(
553
- content=f"Please generate exactly {total_slides} slides ({actual_content_slides} content slides plus quiz, assignment, thank-you) as per your instructions.",
554
  source="user",
555
  recipient="slide_agent"
556
  )
@@ -614,11 +597,11 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
614
  return
615
 
616
  if len(slides) != total_slides:
617
- logger.error("Expected %d slides (including %d content slides + 3), but received %d", total_slides, actual_content_slides, len(slides))
618
  yield f"""
619
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
620
  <h2 style="color: #d9534f;">Incorrect number of slides</h2>
621
- <p style="margin-top: 20px;">Expected {total_slides} slides ({actual_content_slides} content slides + quiz, assignment, thank-you), but generated {len(slides)}. Please try again.</p>
622
  </div>
623
  """
624
  return
@@ -725,15 +708,15 @@ Example: 'Received {total_slides} slides and {total_slides} scripts. Lecture is
725
  txt_links = ""
726
  for txt_file in txt_files:
727
  file_path = os.path.join(OUTPUT_DIR, txt_file)
728
- txt_links += f'<a href="file/{file_path}" download>{txt_file}</a>&nbsp;&nbsp;'
729
 
730
  # Generate audio timeline
731
  audio_timeline = ""
732
  for i, audio_file in enumerate(audio_files):
733
  if audio_file:
734
- audio_timeline += f'<span id="audio-{i+1}">{os.path.basename(audio_file)}</span>&nbsp;&nbsp;'
735
  else:
736
- audio_timeline += f'<span id="audio-{i+1}">slide_{i+1}.mp3</span>&nbsp;&nbsp;'
737
 
738
  slides_info = json.dumps({"slides": markdown_slides, "audioFiles": audio_files})
739
 
@@ -887,7 +870,7 @@ with gr.Blocks(title="Agent Feynman") as demo:
887
  )
888
  api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama")
889
  serpapi_key = gr.Textbox(label="SerpApi Key", type="password", placeholder="Enter your SerpApi key")
890
- num_slides = gr.Slider(1, 20, step=1, label="Number of Content Slides", value=3)
891
  speaker_audio = gr.Audio(label="Speaker sample audio (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
892
  generate_btn = gr.Button("Generate Lecture")
893
  with gr.Column(scale=2):
 
5
  import asyncio
6
  import logging
7
  import torch
 
8
  from serpapi import GoogleSearch
9
  from pydantic import BaseModel
10
  from autogen_agentchat.agents import AssistantAgent
 
88
  def html_with_progress(label, progress):
89
  return f"""
90
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
91
+ <div style="width: 70%; background-color: #FFFFFF; border-radius: 80px; overflow: hidden; margin-bottom: 20px;">
92
+ <div style="width: {progress}%; height: 15px; background-color: #4CAF50; border-radius: 80px;"></div>
93
  </div>
94
  <h2 style="font-style: italic; color: #555;">{label}</h2>
95
  </div>
 
108
  else:
109
  raise ValueError("Invalid service")
110
 
111
+ # Helper function to clean script text
112
  def clean_script_text(script):
113
  if not script or not isinstance(script, str):
114
  logger.error("Invalid script input: %s", script)
 
120
  script = script.replace("humanlike", "human-like").replace("problemsolving", "problem-solving")
121
  script = re.sub(r"\s+", " ", script).strip()
122
 
 
 
 
 
 
 
 
 
 
123
  if len(script) < 10:
124
  logger.error("Cleaned script too short (%d characters): %s", len(script), script)
125
  return None
126
 
127
+ logger.info("Cleaned script: %s", script)
128
  return script
129
 
130
  # Helper function to validate and convert speaker audio
 
296
  slide_number = i + 1
297
  content = slide['content']
298
 
299
+ # First slide has no header/footer, others have header and footer
300
+ if i == 0:
301
  slide_md = f"""
302
  # {slide['title']}
303
  {content}
 
357
 
358
  model_client = get_model_client(api_service, api_key)
359
 
360
+ total_slides = num_slides # Use exactly the number of slides from input
 
361
 
362
  research_agent = AssistantAgent(
363
  name="research_agent",
 
371
  model_client=model_client,
372
  handoffs=["script_agent"],
373
  system_message=f"""
374
+ You are a Slide Agent. Using the research from the conversation history and the specified number of slides ({total_slides}), generate exactly {total_slides} content slides. Output ONLY a JSON array wrapped in ```json ... ``` in a TextMessage, where each slide is an object with 'title' and 'content' keys. Do not include any explanatory text, comments, or other messages. Ensure the JSON is valid and contains exactly {total_slides} slides before proceeding. After outputting the JSON, use the handoff_to_script_agent tool to pass the task to the Script Agent.
375
+ Example output for 2 slides:
376
  ```json
377
  [
378
  {{"title": "Slide 1", "content": "Content for slide 1"}},
379
+ {{"title": "Slide 2", "content": "Content for slide 2"}}
 
 
 
380
  ]
381
  ```""",
382
  output_content_type=None,
 
387
  model_client=model_client,
388
  handoffs=["feynman_agent"],
389
  system_message=f"""
390
+ You are a Script Agent. Access the JSON array of {total_slides} slides from the conversation history. Generate a narration script (1-2 sentences) for each of the {total_slides} slides, summarizing its content in a clear, academically inclined tone as a professor would deliver it. Avoid using non-verbal fillers such as "um," "you know," or "like." Output ONLY a JSON array wrapped in ```json ... ``` with exactly {total_slides} strings, one script per slide, in the same order. Ensure the JSON is valid and complete. After outputting, use the handoff_to_feynman_agent tool. If scripts cannot be generated, retry once.
391
+ Example for 3 slides:
392
  ```json
393
  [
394
+ "Hello everyone, welcome to Agents 101. I am Jaward, your primary instructor for this course.",
395
+ "Today, we will cover the syllabus for this semester, providing a gentle introduction to AI agents.",
396
+ "Let us define what an AI agent is: it refers to a system or program capable of autonomously performing tasks on behalf of a user or another system."
 
 
 
397
  ]
398
  ```""",
399
  output_content_type=None,
 
423
  Topic: {topic}
424
  Additional Instructions: {instructions}
425
  Audience: {lecture_type}
426
+ Number of Slides: {total_slides}
427
  Please start by researching the topic.
428
  """
429
+ logger.info("Starting lecture generation for topic: %s with %d slides", topic, total_slides)
430
 
431
  slides = None
432
  scripts = None
 
469
  slide_retry_count += 1
470
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
471
  retry_message = TextMessage(
472
+ content=f"Please generate exactly {total_slides} slides as per your instructions.",
473
  source="user",
474
  recipient="slide_agent"
475
  )
 
509
  slide_retry_count += 1
510
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
511
  retry_message = TextMessage(
512
+ content=f"Please generate exactly {total_slides} slides as per your instructions.",
513
  source="user",
514
  recipient="slide_agent"
515
  )
 
533
  slide_retry_count += 1
534
  logger.info("Retrying slide generation (attempt %d/%d)", slide_retry_count, max_retries)
535
  retry_message = TextMessage(
536
+ content=f"Please generate exactly {total_slides} slides as per your instructions.",
537
  source="user",
538
  recipient="slide_agent"
539
  )
 
597
  return
598
 
599
  if len(slides) != total_slides:
600
+ logger.error("Expected %d slides, but received %d", total_slides, len(slides))
601
  yield f"""
602
  <div style="display: flex; flex-direction: column; justify-content: center; align-items: center; height: 100%; min-height: 700px; padding: 20px; text-align: center; border: 1px solid #ddd; border-radius: 8px;">
603
  <h2 style="color: #d9534f;">Incorrect number of slides</h2>
604
+ <p style="margin-top: 20px;">Expected {total_slides} slides, but generated {len(slides)}. Please try again.</p>
605
  </div>
606
  """
607
  return
 
708
  txt_links = ""
709
  for txt_file in txt_files:
710
  file_path = os.path.join(OUTPUT_DIR, txt_file)
711
+ txt_links += f'<a href="file/{file_path}" download>{txt_file}</a>  '
712
 
713
  # Generate audio timeline
714
  audio_timeline = ""
715
  for i, audio_file in enumerate(audio_files):
716
  if audio_file:
717
+ audio_timeline += f'<span id="audio-{i+1}">{os.path.basename(audio_file)}</span>  '
718
  else:
719
+ audio_timeline += f'<span id="audio-{i+1}">slide_{i+1}.mp3</span>  '
720
 
721
  slides_info = json.dumps({"slides": markdown_slides, "audioFiles": audio_files})
722
 
 
870
  )
871
  api_key = gr.Textbox(label="Model Provider API Key", type="password", placeholder="Not required for Ollama")
872
  serpapi_key = gr.Textbox(label="SerpApi Key", type="password", placeholder="Enter your SerpApi key")
873
+ num_slides = gr.Slider(1, 20, step=1, label="Number of Slides", value=3)
874
  speaker_audio = gr.Audio(label="Speaker sample audio (MP3 or WAV)", type="filepath", elem_id="speaker-audio")
875
  generate_btn = gr.Button("Generate Lecture")
876
  with gr.Column(scale=2):