fdaudens HF Staff commited on
Commit
547fef1
·
1 Parent(s): fe64cdc

first push

Browse files
Files changed (7) hide show
  1. .DS_Store +0 -0
  2. README.md +10 -6
  3. app.py +144 -0
  4. papers.py +116 -0
  5. prompts.py +56 -0
  6. requirements.txt +8 -0
  7. run_job.py +110 -1
.DS_Store ADDED
Binary file (6.15 kB). View file
 
README.md CHANGED
@@ -1,10 +1,14 @@
1
  ---
2
- title: Podcast Jobs
3
- emoji: 😻
4
- colorFrom: blue
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
 
 
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Open NotebookLM
3
+ emoji: 🎙️
4
+ colorFrom: yellow
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.26.0
8
+ app_file: app.py
9
+ pinned: true
10
+ license: apache-2.0
11
+ short_description: Generate a podcast to discuss the topic of your choice!
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import queue
2
+ import threading
3
+ import spaces
4
+ import os
5
+ import io
6
+ import soundfile as sf
7
+ import gradio as gr
8
+ import numpy as np
9
+ import time
10
+ import pymupdf
11
+ import requests
12
+ from pathlib import Path
13
+
14
+ import torch
15
+ from huggingface_hub import InferenceClient
16
+ from kokoro import KModel, KPipeline
17
+ # -----------------------------------------------------------------------------
18
+ # Get default podcast materials, from Daily papers and one download
19
+ # -----------------------------------------------------------------------------
20
+ from papers import PaperManager
21
+
22
+ paper_manager = PaperManager()
23
+ top_papers = paper_manager.get_top_content()
24
+
25
+ PODCAST_SUBJECT = list(top_papers.values())[0]
26
+
27
+ # -----------------------------------------------------------------------------
28
+ # LLM that writes the script (unchanged)
29
+ # -----------------------------------------------------------------------------
30
+ from prompts import SYSTEM_PROMPT
31
+
32
+ # client = InferenceClient(
33
+ # "meta-llama/Llama-3.3-70B-Instruct",
34
+ # provider="cerebras",
35
+ # token=os.getenv("HF_TOKEN"),
36
+ # )
37
+ client = InferenceClient(
38
+ "Qwen/Qwen3-32B",
39
+ provider="hf-inference",
40
+ token=os.getenv("HF_TOKEN"),
41
+ )
42
+
43
+ def generate_podcast_script(subject: str, steering_question: str | None = None) -> str:
44
+ """Ask the LLM for a script of a podcast given by two hosts."""
45
+ messages = [
46
+ {"role": "system", "content": SYSTEM_PROMPT},
47
+ {"role": "user", "content": f"""Here is the topic: it's the top trending paper on Hugging Face daily papers today. You will need to analyze it by bringing profound insights.
48
+ {subject[:10000]}"""},
49
+ ]
50
+ if steering_question and len(steering_question) > 0:
51
+ messages.append({"role": "user", "content": f"You could focus on this question: {steering_question}"})
52
+
53
+ response = client.chat_completion(
54
+ messages,
55
+ max_tokens=8156,
56
+ )
57
+ full_text = response.choices[0].message.content
58
+ assert "[JANE]" in full_text
59
+ dialogue_start_index = full_text.find("[JANE]")
60
+ podcast_text = full_text[dialogue_start_index:]
61
+ return podcast_text
62
+
63
+ # -----------------------------------------------------------------------------
64
+ # Kokoro TTS
65
+ # -----------------------------------------------------------------------------
66
+ CUDA_AVAILABLE = torch.cuda.is_available()
67
+
68
+ kmodel = KModel(repo_id='hexgrad/Kokoro-82M').to("cuda" if CUDA_AVAILABLE else "cpu").eval()
69
+ kpipeline = KPipeline(lang_code="a") # English voices
70
+
71
+ MALE_VOICE = "am_adam"
72
+ FEMALE_VOICE = "af_heart"
73
+
74
+ # Pre‑warm voices to avoid first‑call latency
75
+ for v in (MALE_VOICE, FEMALE_VOICE):
76
+ kpipeline.load_voice(v)
77
+
78
+ @spaces.GPU
79
+ def generate_podcast(topic: str):
80
+ material_text = PODCAST_SUBJECT
81
+
82
+ # Generate podcast script!
83
+ podcast_script = generate_podcast_script(material_text, topic)
84
+
85
+ lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
86
+
87
+ pipeline = kpipeline
88
+ pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
89
+ pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
90
+
91
+ speed = 1.
92
+ sr = 24000
93
+
94
+ for line in lines:
95
+ if line.startswith("[MIKE]"):
96
+ pipeline_voice = pipeline_voice_male
97
+ voice = MALE_VOICE
98
+ utterance = line[len("[MIKE]"):].strip()
99
+ elif line.startswith("[JANE]"):
100
+ pipeline_voice = pipeline_voice_female
101
+ voice = FEMALE_VOICE
102
+ utterance = line[len("[JANE]"):].strip()
103
+ else: # fallback
104
+ pipeline_voice = pipeline_voice_female
105
+ voice = FEMALE_VOICE
106
+ utterance = line
107
+
108
+ for _, ps, _ in pipeline(utterance, voice, speed):
109
+ t0 = time.time()
110
+ ref_s = pipeline_voice[len(ps) - 1]
111
+ audio_numpy = kmodel(ps, ref_s, speed).numpy()
112
+ yield (sr, audio_numpy)
113
+ t1 = time.time()
114
+ print(f"PROCESSED '{utterance}' in {int(t1-t0)} seconds. {audio_numpy.shape}")
115
+
116
+ EXAMPLES = [
117
+ ["https://huggingface.co/blog/inference-providers-cohere", None, "How does using this compare with other inference solutions?"],
118
+ [None, str(Path("examples/Essay_Palantir.pdf")), "Make sure to keep some critic spirit in the analysis!"],
119
+ ]
120
+ demo = gr.Interface(
121
+ title="Daily Paper Podcast 🎙️",
122
+ description=f"""Generates a podcast discussion between two hosts about today's top trending paper on Hugging Face: '**{list(top_papers.keys())[0]}**'
123
+
124
+ Based on [Kokoro TTS](https://huggingface.co/hexgrad/Kokoro-82M) and [Llama-3.3-70B](meta-llama/Llama-3.3-70B-Instruct) by Cerebras.""",
125
+ fn=generate_podcast,
126
+ inputs=[
127
+ gr.Textbox(
128
+ label="🤔 Do you have a specific aspect of the paper you'd like the hosts to focus on?",
129
+ placeholder="You can leave this blank for a general discussion.",
130
+ ),
131
+ ],
132
+ outputs=[
133
+ gr.Audio(
134
+ label="Listen to your podcast! 🔊",
135
+ format="wav",
136
+ streaming=True,
137
+ ),
138
+ ],
139
+ theme=gr.themes.Soft(),
140
+ submit_btn="Generate podcast 🎙️",
141
+ )
142
+
143
+ if __name__ == "__main__":
144
+ demo.launch()
papers.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import tempfile
4
+ from datetime import datetime, timezone
5
+ import base64
6
+ from tqdm.auto import tqdm
7
+ import pymupdf
8
+
9
+ DAILY_PAPERS_API_URL = "https://huggingface.co/api/daily_papers"
10
+
11
+ class PaperManager:
12
+ def __init__(self, papers_per_page=30):
13
+ self.papers = []
14
+ self.raw_papers = [] # To store fetched data
15
+
16
+ def calculate_rising_score(self, paper):
17
+ """
18
+ Calculate the rising score of a paper.
19
+ This emphasizes recent upvotes and the rate of upvote accumulation.
20
+ """
21
+ upvotes = paper.get('paper', {}).get('upvotes', 0)
22
+ published_at_str = paper.get('publishedAt', datetime.now(timezone.utc).isoformat())
23
+ try:
24
+ published_time = datetime.fromisoformat(published_at_str.replace('Z', '+00:00'))
25
+ except ValueError:
26
+ published_time = datetime.now(timezone.utc)
27
+
28
+ time_diff = datetime.now(timezone.utc) - published_time
29
+ time_diff_hours = time_diff.total_seconds() / 3600 # Convert time difference to hours
30
+
31
+ # Rising score favors papers that are gaining upvotes quickly
32
+ # Adjusted to have a linear decay over time
33
+ score = upvotes / (time_diff_hours + 1)
34
+ return score
35
+
36
+ def fetch_papers(self):
37
+ try:
38
+ response = requests.get(f"{DAILY_PAPERS_API_URL}?limit=100")
39
+ response.raise_for_status()
40
+ data = response.json()
41
+
42
+ if not data:
43
+ print("No data received from API.")
44
+ return False
45
+
46
+ self.raw_papers = data # Store raw data
47
+
48
+ return True
49
+
50
+ except requests.RequestException as e:
51
+ print(f"Error fetching papers: {e}")
52
+ return False
53
+ except Exception as e:
54
+ print(f"Unexpected error: {e}")
55
+ return False
56
+
57
+ def filter_top_papers(self, threshold_general=2.0, threshold_agent=0.7):
58
+ self.papers = []
59
+ for paper in self.raw_papers:
60
+ paper_score = self.calculate_rising_score(paper)
61
+ # if paper_score >= threshold_general or ('agent' in paper['title'].lower() and paper_score >= threshold_agent):
62
+ self.papers.append(paper)
63
+
64
+ self.papers = sorted(
65
+ self.papers,
66
+ key=lambda x: self.calculate_rising_score(x) * (3 if 'agent' in x['title'].lower() else 1),
67
+ reverse=True
68
+ )[:2]
69
+ return self.papers
70
+
71
+ # def get_paper_content(self, paper_id):
72
+ # pdf_url = f"https://arxiv.org/pdf/{paper_id}.pdf"
73
+ # print("Processing paper:", pdf_url)
74
+ # client = httpx.Client(follow_redirects=True)
75
+ # response = client.get(pdf_url)
76
+
77
+ # # First verification - check if we got a valid PDF response
78
+ # if response.status_code != 200:
79
+ # raise Exception(f"Failed to fetch PDF: {response.status_code}")
80
+
81
+ # if not response.headers.get('content-type', '').startswith('application/pdf'):
82
+ # raise Exception(f"Unexpected content type: {response.headers.get('content-type')}")
83
+
84
+ # # Second verification - check the first few bytes of the content
85
+ # if not response.content.startswith(b'%PDF'):
86
+ # raise Exception("Content doesn't appear to be a valid PDF")
87
+
88
+ # pdf_data = base64.standard_b64encode(response.content).decode("utf-8")
89
+ # return {"pdf": pdf_data, "url": pdf_url}
90
+
91
+ def get_paper_text(self, paper_id):
92
+ url = f"https://arxiv.org/pdf/{paper_id}.pdf"
93
+ response = requests.get(url)
94
+
95
+ if response.status_code != 200:
96
+ raise Exception(f"Failed to download PDF: {response.status_code}")
97
+
98
+ with open("temp.pdf", "wb") as f:
99
+ f.write(response.content)
100
+
101
+ with pymupdf.open("temp.pdf") as doc:
102
+ text = ""
103
+ for page in doc:
104
+ text += page.get_text()
105
+ return text
106
+
107
+
108
+ def get_top_content(self):
109
+ self.fetch_papers()
110
+ self.filter_top_papers()
111
+ contents = {}
112
+ print(f"Processing {len(self.papers)} papers:")
113
+ for paper in tqdm(self.papers):
114
+ paper_id = paper["paper"]['id']
115
+ contents[paper["paper"]['title']] = self.get_paper_text(paper_id)
116
+ return contents
prompts.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # System prompt taken from the great space by Gabriel Chua: https://huggingface.co/spaces/gabrielchua/open-notebooklm/blob/main/prompts.py
2
+
3
+ SYSTEM_PROMPT = """
4
+ You are a world-class podcast producer tasked with transforming the provided input text into an engaging and informative podcast script. The input may be unstructured or messy, sourced from PDFs or web pages. Your goal is to extract the most interesting and insightful content for a compelling podcast discussion.
5
+ # Steps to Follow:
6
+
7
+ ### 1. Analyze the Input:
8
+ Carefully examine the text, identifying key topics, points, and interesting facts or anecdotes that could drive an engaging podcast conversation. Disregard irrelevant information or formatting issues.
9
+ DO this under the <analysis> part
10
+
11
+ ### 2. Brainstorm Ideas:
12
+ In the <scratchpad> part, creatively brainstorm ways to present the key points engagingly. Consider:
13
+ - Analogies, storytelling techniques, or hypothetical scenarios to make content relatable
14
+ - Ways to make complex topics accessible to a general audience
15
+ - Thought-provoking questions to explore during the podcast
16
+ - Creative approaches to fill any gaps in the information
17
+
18
+ ### 3. Craft the Dialogue:
19
+ Develop a natural, conversational flow between the two hosts named Jane and Mike. Incorporate:
20
+ - The best ideas from your brainstorming session
21
+ - Clear explanations of complex topics
22
+ - An engaging and lively tone to captivate listeners. Learning should be fun!
23
+ - A balance of information and entertainment
24
+ Rules for the dialogue:
25
+ - The female host (Jane) always initiates the conversation and interviews the guest
26
+ - Include thoughtful questions from the host to guide the discussion
27
+ - Incorporate natural speech patterns, including occasional verbal fillers (e.g., "um," "well," "you know")
28
+ - Allow for natural interruptions and back-and-forth between host and guest
29
+ - Ensure the guest's responses are substantiated by the input text, avoiding unsupported claims
30
+ - Maintain a PG-rated conversation appropriate for all audiences
31
+ - The host concludes the conversation
32
+ **Summarize Key Insights:**
33
+ Naturally weave a summary of key points into the closing part of the dialogue. This should feel like a casual conversation rather than a formal recap, reinforcing the main takeaways before signing off.
34
+ **Maintain Authenticity:**
35
+ Throughout the script, strive for authenticity in the conversation. Include:
36
+ - Moments of genuine curiosity or surprise from the host
37
+ - Instances where one of the hosts might briefly struggle to articulate a complex idea
38
+ - Light-hearted moments or humor when appropriate
39
+ **Consider Pacing and Structure:
40
+ Ensure the dialogue has a natural ebb and flow:
41
+ - Start with a strong hook to grab the listener's attention
42
+ - Gradually build complexity as the conversation progresses
43
+ - Include brief "breather" moments for listeners to absorb complex information
44
+ - End on a high note, perhaps with a thought-provoking question or a call-to-action for listeners
45
+
46
+ TONE: The tone of the podcast should be casual.
47
+
48
+ DURATION: Aim for a moderate length, about 3-5 minutes.
49
+
50
+ IMPORTANT RULE: Each line of dialogue should go in a new line [JANE] or [MIKE], as follows:
51
+
52
+ [JANE] Hello Mike, how are you?
53
+ [MIKE] Nice to see you again, Jane. I'm very good. Today's topic is fascinating, because...
54
+
55
+ Remember: Each turn from a host should be on the same line.
56
+ """
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ kokoro
2
+ huggingface_hub
3
+ transformers
4
+ PyMuPDF
5
+ soundfile
6
+ numpy
7
+ requests
8
+ json
run_job.py CHANGED
@@ -1 +1,110 @@
1
- print("Hello, world!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from papers import PaperManager
2
+ from app import generate_podcast_script, kmodel, kpipeline, MALE_VOICE, FEMALE_VOICE
3
+ import soundfile as sf
4
+ import numpy as np
5
+ import argparse
6
+ from huggingface_hub import HfApi
7
+ import requests
8
+ import json
9
+
10
+ # topics = [folder for folder in os.listdir("podcasts") if os.path.isdir(os.path.join("podcasts", folder))]
11
+ podcasts = {}
12
+
13
+ # for topic in topics:
14
+ # topic_path = os.path.join("podcasts", topic)
15
+ # podcasts[topic] = sorted([f.replace(".md", "") for f in os.listdir(topic_path) if f.endswith(".md")], reverse=True)
16
+
17
+ def submit_job(
18
+ repo_id: str,
19
+ inference_provider: str,
20
+ hf_token: str
21
+ ):
22
+ # Configuration variables
23
+ username = HfApi(token=hf_token).whoami()["name"] # Your HuggingFace username
24
+ space_id = "fdaudens/podcast-jobs" # Your space ID
25
+ # If you want to always use the username-based repo_id, remove repo_id from parameters
26
+ repo_id = f"{username}/news-podcasts"
27
+ flavor = "cpu-basic" # Machine type
28
+
29
+ # Create the API request
30
+ url = f"https://huggingface.co/api/jobs/{username}"
31
+ headers = {
32
+ "Content-Type": "application/json",
33
+ "Authorization": f"Bearer {hf_token}"
34
+ }
35
+
36
+ payload = {
37
+ "spaceId": space_id,
38
+ "command": ["python", "run_job.py"],
39
+ "arguments": [
40
+ "--provider", inference_provider,
41
+ "--repo-id", repo_id
42
+ ],
43
+ "environment": {
44
+ "HF_API_KEY": hf_token
45
+ },
46
+ "flavor": flavor
47
+ }
48
+
49
+ # Launch the job
50
+ response = requests.post(url, headers=headers, data=json.dumps(payload))
51
+ return response.text
52
+
53
+ def main():
54
+ parser = argparse.ArgumentParser(description="Podcast job runner")
55
+ parser.add_argument("--provider", type=str, default="hf-inference")
56
+ parser.add_argument("--repo-id", type=str, default="fdaudens/news-podcasts")
57
+ parser.add_argument("--flavor", type=str, default="t4-medium")
58
+ args = parser.parse_args()
59
+
60
+ print(f"Arguments: provider={args.provider}, repo_id={args.repo_id}, flavor={args.flavor}")
61
+
62
+ # 1. Get the most popular paper's content
63
+ paper_manager = PaperManager()
64
+ top_papers = paper_manager.get_top_content()
65
+ # Get the first (most popular) paper's text
66
+ subject = list(top_papers.values())[0]
67
+
68
+ # 2. Generate the podcast script
69
+ podcast_script = generate_podcast_script(subject)
70
+
71
+ # 3. Synthesize the podcast audio
72
+ lines = [l for l in podcast_script.strip().splitlines() if l.strip()]
73
+ sr = 24000
74
+ speed = 1.0
75
+ audio_segments = []
76
+
77
+ pipeline = kpipeline
78
+ pipeline_voice_female = pipeline.load_voice(FEMALE_VOICE)
79
+ pipeline_voice_male = pipeline.load_voice(MALE_VOICE)
80
+
81
+ for line in lines:
82
+ if line.startswith("[MIKE]"):
83
+ pipeline_voice = pipeline_voice_male
84
+ voice = MALE_VOICE
85
+ utterance = line[len("[MIKE]"):].strip()
86
+ elif line.startswith("[JANE]"):
87
+ pipeline_voice = pipeline_voice_female
88
+ voice = FEMALE_VOICE
89
+ utterance = line[len("[JANE]"):].strip()
90
+ else:
91
+ pipeline_voice = pipeline_voice_female
92
+ voice = FEMALE_VOICE
93
+ utterance = line
94
+
95
+ for _, ps, _ in pipeline(utterance, voice, speed):
96
+ ref_s = pipeline_voice[len(ps) - 1]
97
+ audio_numpy = kmodel(ps, ref_s, speed).numpy()
98
+ audio_segments.append(audio_numpy)
99
+
100
+ # Concatenate all audio segments
101
+ if audio_segments:
102
+ full_audio = np.concatenate(audio_segments)
103
+ # 4. Save as WAV file
104
+ sf.write("podcast.wav", full_audio, sr)
105
+ print("Podcast audio saved as podcast.wav")
106
+ else:
107
+ print("No audio generated.")
108
+
109
+ if __name__ == "__main__":
110
+ main()