fdaudens HF Staff commited on
Commit
72ae2e5
·
1 Parent(s): c57bf0e

add functions to app.py

Browse files
Files changed (3) hide show
  1. app.py +1 -28
  2. run_job.py +10 -1
  3. update-rss.py → update_rss.py +33 -1
app.py CHANGED
@@ -13,6 +13,7 @@ from pathlib import Path
13
  from pydub import AudioSegment # Add this import
14
  import tempfile
15
  import re
 
16
 
17
  import torch
18
  from huggingface_hub import InferenceClient
@@ -75,34 +76,6 @@ def generate_podcast_script(subject: str, steering_question: str | None = None)
75
  podcast_text = sanitize_script(podcast_text)
76
  return podcast_text
77
 
78
- def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
79
- """Ask the LLM for a headline and a short description for the podcast episode."""
80
- prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
81
- 1. A catchy, informative headline for a podcast episode about it (max 15 words).
82
- 2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
83
-
84
- Here is the topic:
85
- {subject[:10000]}
86
- """
87
- messages = [
88
- {"role": "system", "content": "You are a world-class podcast producer."},
89
- {"role": "user", "content": prompt},
90
- ]
91
- response = client.chat_completion(
92
- messages,
93
- max_tokens=512,
94
- )
95
- full_text = response.choices[0].message.content.strip()
96
- # Try to split headline and description
97
- lines = [l.strip() for l in full_text.splitlines() if l.strip()]
98
- if len(lines) >= 2:
99
- headline = lines[0]
100
- description = " ".join(lines[1:])
101
- else:
102
- headline = full_text[:80]
103
- description = full_text
104
- return headline, description
105
-
106
  # -----------------------------------------------------------------------------
107
  # Kokoro TTS
108
  # -----------------------------------------------------------------------------
 
13
  from pydub import AudioSegment # Add this import
14
  import tempfile
15
  import re
16
+ from update_rss import generate_headline_and_description, get_next_episode_number, update_rss
17
 
18
  import torch
19
  from huggingface_hub import InferenceClient
 
76
  podcast_text = sanitize_script(podcast_text)
77
  return podcast_text
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # -----------------------------------------------------------------------------
80
  # Kokoro TTS
81
  # -----------------------------------------------------------------------------
run_job.py CHANGED
@@ -9,6 +9,7 @@ import json
9
  from datetime import datetime
10
  import os
11
  import tempfile
 
12
 
13
  def submit_job(
14
  inference_provider: str,
@@ -126,11 +127,19 @@ def main():
126
  token=hf_token
127
  )
128
 
 
 
 
129
  # Clean up temporary file
130
  os.unlink(temp_path)
131
 
132
  print(f"Podcast audio uploaded to Space at {space_path}")
133
- print(f"Access URL: https://huggingface.co/spaces/{space_id}/blob/main/{space_path}")
 
 
 
 
 
134
  else:
135
  print("No audio generated.")
136
 
 
9
  from datetime import datetime
10
  import os
11
  import tempfile
12
+ from update_rss import generate_headline_and_description, get_next_episode_number, update_rss
13
 
14
  def submit_job(
15
  inference_provider: str,
 
127
  token=hf_token
128
  )
129
 
130
+ audio_url = f"https://huggingface.co/spaces/{space_id}/blob/main/{space_path}"
131
+ audio_length = os.path.getsize(temp_path)
132
+
133
  # Clean up temporary file
134
  os.unlink(temp_path)
135
 
136
  print(f"Podcast audio uploaded to Space at {space_path}")
137
+ print(f"Access URL: {audio_url}")
138
+
139
+ # After uploading the podcast audio
140
+ # headline, description = generate_headline_and_description(subject)
141
+ # episode_number = get_next_episode_number()
142
+ update_rss(subject, audio_url, audio_length)
143
  else:
144
  print("No audio generated.")
145
 
update-rss.py → update_rss.py RENAMED
@@ -1,8 +1,40 @@
1
  import xml.etree.ElementTree as ET
2
  from datetime import datetime
3
  import os
4
- from app import generate_headline_and_description
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def get_next_episode_number(podcast_dir="podcasts"):
7
  files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")]
8
  return len(files) + 1
 
1
  import xml.etree.ElementTree as ET
2
  from datetime import datetime
3
  import os
4
+ from huggingface_hub import InferenceClient
5
+ from app import client
6
 
7
+ def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
8
+ """Ask the LLM for a headline and a short description for the podcast episode."""
9
+ prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
10
+ 1. A catchy, informative headline for a podcast episode about it (max 15 words).
11
+ 2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
12
+
13
+ Here is the topic:
14
+ {subject[:10000]}
15
+ """
16
+ messages = [
17
+ {"role": "system", "content": "You are a world-class podcast producer."},
18
+ {"role": "user", "content": prompt},
19
+ ]
20
+ response = client.chat_completion(
21
+ messages,
22
+ max_tokens=512,
23
+ )
24
+ full_text = response.choices[0].message.content.strip()
25
+ # Try to split headline and description
26
+ lines = [l.strip() for l in full_text.splitlines() if l.strip()]
27
+ if len(lines) >= 2:
28
+ headline = lines[0]
29
+ description = " ".join(lines[1:])
30
+ else:
31
+ headline = full_text[:80]
32
+ description = full_text
33
+ return headline, description
34
+
35
+ # -----------------------------------------------------------------------------
36
+ # UPDATE RSS
37
+ # -----------------------------------------------------------------------------
38
  def get_next_episode_number(podcast_dir="podcasts"):
39
  files = [f for f in os.listdir(podcast_dir) if f.endswith(".wav")]
40
  return len(files) + 1