Spaces:

fdaudens
/

podcast-jobs-rss-test

Sleeping

App Files Files Community

fdaudens HF Staff commited on May 14

Commit

72ae2e5

1 Parent(s): c57bf0e

add functions to app.py

Browse files

Files changed (3) hide show

app.py +1 -28
run_job.py +10 -1
update-rss.py → update_rss.py +33 -1

app.py CHANGED Viewed

@@ -13,6 +13,7 @@ from pathlib import Path
 from pydub import AudioSegment  # Add this import
 import tempfile
 import re
 import torch
 from huggingface_hub import InferenceClient
@@ -75,34 +76,6 @@ def generate_podcast_script(subject: str, steering_question: str | None = None)
     podcast_text = sanitize_script(podcast_text)
     return podcast_text
-def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
-    """Ask the LLM for a headline and a short description for the podcast episode."""
-    prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
-1. A catchy, informative headline for a podcast episode about it (max 15 words).
-2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
-Here is the topic:
-{subject[:10000]}
-"""
-    messages = [
-        {"role": "system", "content": "You are a world-class podcast producer."},
-        {"role": "user", "content": prompt},
-    ]
-    response = client.chat_completion(
-        messages,
-        max_tokens=512,
-    )
-    full_text = response.choices[0].message.content.strip()
-    # Try to split headline and description
-    lines = [l.strip() for l in full_text.splitlines() if l.strip()]
-    if len(lines) >= 2:
-        headline = lines[0]
-        description = " ".join(lines[1:])
-    else:
-        headline = full_text[:80]
-        description = full_text
-    return headline, description
 # -----------------------------------------------------------------------------
 # Kokoro TTS
 # -----------------------------------------------------------------------------

 from pydub import AudioSegment  # Add this import
 import tempfile
 import re
+from update_rss import generate_headline_and_description, get_next_episode_number, update_rss
 import torch
 from huggingface_hub import InferenceClient
     podcast_text = sanitize_script(podcast_text)
     return podcast_text
 # -----------------------------------------------------------------------------
 # Kokoro TTS
 # -----------------------------------------------------------------------------

run_job.py CHANGED Viewed

@@ -9,6 +9,7 @@ import json
 from datetime import datetime
 import os
 import tempfile
 def submit_job(
     inference_provider: str,
@@ -126,11 +127,19 @@ def main():
             token=hf_token
         )
         # Clean up temporary file
         os.unlink(temp_path)
         print(f"Podcast audio uploaded to Space at {space_path}")
-        print(f"Access URL: https://huggingface.co/spaces/{space_id}/blob/main/{space_path}")
     else:
         print("No audio generated.")

 from datetime import datetime
 import os
 import tempfile
+from update_rss import generate_headline_and_description, get_next_episode_number, update_rss
 def submit_job(
     inference_provider: str,
             token=hf_token
         )
+        audio_url = f"https://huggingface.co/spaces/{space_id}/blob/main/{space_path}"
+        audio_length = os.path.getsize(temp_path)
         # Clean up temporary file
         os.unlink(temp_path)
         print(f"Podcast audio uploaded to Space at {space_path}")
+        print(f"Access URL: {audio_url}")
+        # After uploading the podcast audio
+        # headline, description = generate_headline_and_description(subject)
+        # episode_number = get_next_episode_number()
+        update_rss(subject, audio_url, audio_length)
     else:
         print("No audio generated.")

update-rss.py → update_rss.py RENAMED Viewed

@@ -1,8 +1,40 @@
 import xml.etree.ElementTree as ET
 from datetime import datetime
 import os
-from app import generate_headline_and_description
 def get_next_episode_number(podcast_dir="podcasts"):
     files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")]
     return len(files) + 1

 import xml.etree.ElementTree as ET
 from datetime import datetime
 import os
+from huggingface_hub import InferenceClient
+from app import client
+def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
+    """Ask the LLM for a headline and a short description for the podcast episode."""
+    prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
+1. A catchy, informative headline for a podcast episode about it (max 15 words).
+2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
+Here is the topic:
+{subject[:10000]}
+"""
+    messages = [
+        {"role": "system", "content": "You are a world-class podcast producer."},
+        {"role": "user", "content": prompt},
+    ]
+    response = client.chat_completion(
+        messages,
+        max_tokens=512,
+    )
+    full_text = response.choices[0].message.content.strip()
+    # Try to split headline and description
+    lines = [l.strip() for l in full_text.splitlines() if l.strip()]
+    if len(lines) >= 2:
+        headline = lines[0]
+        description = " ".join(lines[1:])
+    else:
+        headline = full_text[:80]
+        description = full_text
+    return headline, description
+# -----------------------------------------------------------------------------
+# UPDATE RSS
+# -----------------------------------------------------------------------------
 def get_next_episode_number(podcast_dir="podcasts"):
     files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")]
     return len(files) + 1