Simplify YouTube functionality, remove YouTube API
Browse files- app.py +90 -113
- requirements.txt +0 -3
- requirements_local.txt +4 -3
- youtube_api.py +0 -265
app.py
CHANGED
@@ -14,7 +14,8 @@ import traceback # For printing full errors
|
|
14 |
import platform
|
15 |
import re
|
16 |
import subprocess
|
17 |
-
|
|
|
18 |
|
19 |
# --- 硬體檢查函數 ---
|
20 |
def get_hardware_info():
|
@@ -140,13 +141,8 @@ def update_download_file(filepath):
|
|
140 |
return None
|
141 |
|
142 |
# --- YouTube 音訊處理 ---
|
143 |
-
def process_youtube_url(youtube_url
|
144 |
-
"""處理 YouTube URL,下載音訊並返回播放器和下載按鈕的更新
|
145 |
-
|
146 |
-
Args:
|
147 |
-
youtube_url: YouTube 視頻 URL
|
148 |
-
user_api_key: 用戶輸入的 API 金鑰(可選)
|
149 |
-
"""
|
150 |
if not youtube_url or not youtube_url.strip():
|
151 |
return gr.update(visible=False, value=None), gr.update(visible=False, value=None)
|
152 |
|
@@ -154,51 +150,80 @@ def process_youtube_url(youtube_url, user_api_key=None):
|
|
154 |
import os
|
155 |
is_spaces = os.environ.get("SPACE_ID") is not None
|
156 |
|
157 |
-
# 優先使用用戶輸入的 API 金鑰,如果沒有則使用環境變量中的 API 金鑰
|
158 |
-
youtube_api_key = user_api_key if user_api_key and user_api_key.strip() else os.environ.get("YOUTUBE_API_KEY")
|
159 |
-
|
160 |
try:
|
161 |
-
# 如果有 API 金鑰,設置它
|
162 |
-
if youtube_api_key:
|
163 |
-
youtube_api.set_api_key(youtube_api_key)
|
164 |
-
print(f"Using YouTube Data API with {'user-provided' if user_api_key else 'environment'} API key")
|
165 |
-
else:
|
166 |
-
print("No YouTube API key found, falling back to direct download")
|
167 |
-
|
168 |
# 處理 YouTube URL
|
169 |
print(f"Processing YouTube URL: {youtube_url}")
|
170 |
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
# 在 Spaces 環境中且沒有 API 金鑰,顯示警告(但不是在啟動時)
|
176 |
-
print("Warning: YouTube download is not supported in Hugging Face Spaces without an API key.")
|
177 |
-
raise gr.Error("YouTube 下載在 Hugging Face Spaces 中需要 API 金鑰。請在上方的 'YouTube API Key Settings' 中輸入您的 API 金鑰。\n\nYouTube download in Hugging Face Spaces requires an API key. Please enter your API key in the 'YouTube API Key Settings' section above.")
|
178 |
|
179 |
-
# 使用
|
180 |
-
audio_path,
|
181 |
|
182 |
if audio_path and os.path.exists(audio_path):
|
183 |
-
# 如果有視頻信息,顯示它
|
184 |
-
if video_info:
|
185 |
-
title = video_info.get("title", "Unknown")
|
186 |
-
duration = video_info.get("duration", "Unknown")
|
187 |
-
print(f"Title: {title}, Duration: {duration}s")
|
188 |
-
|
189 |
# 返回音訊播放器和下載按鈕的更新
|
190 |
return gr.update(visible=True, value=audio_path), gr.update(visible=True, value=audio_path)
|
191 |
else:
|
192 |
return gr.update(visible=False, value=None), gr.update(visible=False, value=None)
|
193 |
except Exception as e:
|
194 |
print(f"Error processing YouTube URL: {e}")
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
# --- Load ASR Pipeline ---
|
204 |
def load_asr_pipeline(model_id):
|
@@ -337,7 +362,7 @@ def load_phi4_model(model_id):
|
|
337 |
def transcribe_audio(mic_input, file_input, youtube_url, selected_model_identifier,
|
338 |
task, language, return_timestamps,
|
339 |
phi4_prompt_text, device_choice,
|
340 |
-
previous_output_text, active_tab
|
341 |
global pipe, phi4_model, phi4_processor, current_model_name, current_device
|
342 |
audio_source = None
|
343 |
source_type_en = ""
|
@@ -444,29 +469,20 @@ def transcribe_audio(mic_input, file_input, youtube_url, selected_model_identifi
|
|
444 |
status_update_str = f"Downloading YouTube Audio / 正在下載 YouTube 音訊..."
|
445 |
output_text_accumulated = status_update_prefix + status_update_str
|
446 |
|
447 |
-
#
|
448 |
-
|
|
|
|
|
|
|
449 |
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
audio_duration = video_info["duration"]
|
459 |
-
if video_info.get("title"):
|
460 |
-
print(f"Processing YouTube video: {video_info['title']}")
|
461 |
-
else:
|
462 |
-
# 如果沒有時長信息,稍後會嘗試從音頻文件獲取
|
463 |
-
audio_duration = None
|
464 |
-
else:
|
465 |
-
output_text_accumulated = status_update_prefix + "Error: Failed to download YouTube audio. / 錯誤:無法下載 YouTube 音訊。"
|
466 |
-
return (output_text_accumulated, gr.update(), gr.update(), gr.update())
|
467 |
-
except Exception as e:
|
468 |
-
error_message = str(e)
|
469 |
-
output_text_accumulated = status_update_prefix + f"Error: {error_message}"
|
470 |
return (output_text_accumulated, gr.update(), gr.update(), gr.update())
|
471 |
else:
|
472 |
# 如果沒有選擇任何輸入源或當前標籤沒有有效輸入
|
@@ -810,52 +826,15 @@ with gr.Blocks(css=compact_css, theme=gr.themes.Default(spacing_size=gr.themes.s
|
|
810 |
|
811 |
# 如果在 Spaces 環境中,顯示警告訊息
|
812 |
if is_spaces:
|
813 |
-
# 檢查是否有 API 金鑰
|
814 |
-
youtube_api_key = os.environ.get("YOUTUBE_API_KEY")
|
815 |
-
if youtube_api_key:
|
816 |
-
gr.Markdown("""
|
817 |
-
ℹ️ **注意:YouTube 下載使用 YouTube Data API**
|
818 |
-
|
819 |
-
在 Hugging Face Spaces 中,YouTube 下載功能使用 YouTube Data API。這提供了更穩定的體驗。
|
820 |
-
|
821 |
-
ℹ️ **Note: YouTube download uses YouTube Data API**
|
822 |
-
|
823 |
-
In Hugging Face Spaces, YouTube download functionality uses the YouTube Data API. This provides a more stable experience.
|
824 |
-
""", elem_classes="info-box")
|
825 |
-
else:
|
826 |
-
gr.Markdown("""
|
827 |
-
⚠️ **注意:YouTube 下載在 Hugging Face Spaces 中需要 API 金鑰**
|
828 |
-
|
829 |
-
由於安全限制,Spaces 環境需要 YouTube Data API 金鑰才能下載 YouTube 視頻。請在環境變量中設置 YOUTUBE_API_KEY,或在本地環境中使用此功能。
|
830 |
-
|
831 |
-
⚠️ **Note: YouTube download in Hugging Face Spaces requires an API key**
|
832 |
-
|
833 |
-
Due to security restrictions, Spaces environment requires a YouTube Data API key to download YouTube videos. Please set YOUTUBE_API_KEY in environment variables, or use this feature in a local environment.
|
834 |
-
""", elem_classes="warning-box")
|
835 |
-
|
836 |
-
# API 金鑰輸入框
|
837 |
-
with gr.Accordion("YouTube API Key Settings / YouTube API 金鑰設置", open=False):
|
838 |
gr.Markdown("""
|
839 |
-
|
840 |
|
841 |
-
|
842 |
|
843 |
-
|
844 |
|
845 |
-
|
846 |
-
|
847 |
-
2. 創建一個新項目(或選擇現有項目)/ Create a new project (or select an existing one)
|
848 |
-
3. 啟用 YouTube Data API v3 / Enable YouTube Data API v3
|
849 |
-
4. 創建 API 金鑰 / Create an API key
|
850 |
-
5. 將 API 金鑰複製到下方輸入框 / Copy the API key to the input box below
|
851 |
-
|
852 |
-
**注意 / Note:** API 金鑰僅在當前會話中有效,頁面刷新後需要重新輸入。/ The API key is only valid for the current session and needs to be re-entered after page refresh.
|
853 |
-
""")
|
854 |
-
youtube_api_key_input = gr.Textbox(
|
855 |
-
label="YouTube API Key / YouTube API 金鑰",
|
856 |
-
placeholder="Enter your YouTube API key here / 在此輸入您的 YouTube API 金鑰",
|
857 |
-
type="password"
|
858 |
-
)
|
859 |
|
860 |
# YouTube URL 輸入框
|
861 |
youtube_input = gr.Textbox(label="YouTube URL / 網址", placeholder="Paste YouTube link here / 在此貼上 YouTube 連結")
|
@@ -872,17 +851,15 @@ with gr.Blocks(css=compact_css, theme=gr.themes.Default(spacing_size=gr.themes.s
|
|
872 |
is_spaces = os.environ.get("SPACE_ID") is not None
|
873 |
|
874 |
# 檢查是否是在啟動時自動調用(緩存範例)
|
875 |
-
is_startup =
|
876 |
|
877 |
# ��果是在 Spaces 環境中啟動時調用,則不處理 URL
|
878 |
-
if is_spaces
|
879 |
-
print("Skipping example URL processing
|
880 |
return gr.update(visible=False, value=None), gr.update(visible=False, value=None)
|
881 |
|
882 |
-
# 獲取 API 金鑰
|
883 |
-
api_key = youtube_api_key_input.value if hasattr(youtube_api_key_input, 'value') else None
|
884 |
# 處理 URL
|
885 |
-
return process_youtube_url(url
|
886 |
|
887 |
gr.Examples(
|
888 |
examples=[["https://www.youtube.com/watch?v=5D7l0tqQJ7k"]],
|
@@ -948,7 +925,7 @@ with gr.Blocks(css=compact_css, theme=gr.themes.Default(spacing_size=gr.themes.s
|
|
948 |
# 連接 YouTube 處理功能
|
949 |
youtube_input.change(
|
950 |
fn=process_youtube_url,
|
951 |
-
inputs=
|
952 |
outputs=[youtube_audio_player, youtube_download],
|
953 |
show_progress=True
|
954 |
)
|
@@ -1037,7 +1014,7 @@ with gr.Blocks(css=compact_css, theme=gr.themes.Default(spacing_size=gr.themes.s
|
|
1037 |
# Main submit action - Corrected outputs list
|
1038 |
submit_button.click(
|
1039 |
fn=transcribe_audio_with_error_handling,
|
1040 |
-
inputs=[mic_input, file_input, youtube_input, model_select, task_input, language_input, timestamp_input, phi4_prompt_input, device_input, output_text, active_tab
|
1041 |
outputs=[output_text, mic_input, file_input, youtube_input], # 保持原始輸出
|
1042 |
show_progress="full" # 顯示完整進度條
|
1043 |
)
|
|
|
14 |
import platform
|
15 |
import re
|
16 |
import subprocess
|
17 |
+
# 移除 YouTube API 相關導入,回到直接使用 yt-dlp
|
18 |
+
# import youtube_api
|
19 |
|
20 |
# --- 硬體檢查函數 ---
|
21 |
def get_hardware_info():
|
|
|
141 |
return None
|
142 |
|
143 |
# --- YouTube 音訊處理 ---
|
144 |
+
def process_youtube_url(youtube_url):
|
145 |
+
"""處理 YouTube URL,下載音訊並返回播放器和下載按鈕的更新"""
|
|
|
|
|
|
|
|
|
|
|
146 |
if not youtube_url or not youtube_url.strip():
|
147 |
return gr.update(visible=False, value=None), gr.update(visible=False, value=None)
|
148 |
|
|
|
150 |
import os
|
151 |
is_spaces = os.environ.get("SPACE_ID") is not None
|
152 |
|
|
|
|
|
|
|
153 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
# 處理 YouTube URL
|
155 |
print(f"Processing YouTube URL: {youtube_url}")
|
156 |
|
157 |
+
if is_spaces:
|
158 |
+
# 在 Spaces 環境中,顯示警告
|
159 |
+
print("Warning: YouTube download is not supported in Hugging Face Spaces.")
|
160 |
+
raise gr.Error("YouTube 下載在 Hugging Face Spaces 中不可用。由於安全限制,Spaces 環境無法通過 YouTube 的機器人驗證。請在本地環境中使用此功能。\n\nYouTube download is not available in Hugging Face Spaces. Due to security restrictions, Spaces environment cannot pass YouTube's bot verification. Please use this feature in a local environment.")
|
|
|
|
|
|
|
161 |
|
162 |
+
# 使用 yt-dlp 直接下載
|
163 |
+
audio_path, temp_dir, duration = download_youtube_audio(youtube_url)
|
164 |
|
165 |
if audio_path and os.path.exists(audio_path):
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
# 返回音訊播放器和下載按鈕的更新
|
167 |
return gr.update(visible=True, value=audio_path), gr.update(visible=True, value=audio_path)
|
168 |
else:
|
169 |
return gr.update(visible=False, value=None), gr.update(visible=False, value=None)
|
170 |
except Exception as e:
|
171 |
print(f"Error processing YouTube URL: {e}")
|
172 |
+
return gr.update(visible=False, value=None), gr.update(visible=False, value=None)
|
173 |
+
|
174 |
+
# --- YouTube Audio Download Function ---
|
175 |
+
def download_youtube_audio(url):
|
176 |
+
# 使用固定的目錄來存儲下載的音訊文件,這樣它們就不會被刪除
|
177 |
+
download_dir = os.path.join(tempfile.gettempdir(), "youtube_downloads")
|
178 |
+
os.makedirs(download_dir, exist_ok=True)
|
179 |
+
|
180 |
+
# 從 URL 中提取視頻 ID 作��文件名的一部分
|
181 |
+
video_id = url.split("v=")[-1].split("&")[0] if "v=" in url else str(int(time.time()))
|
182 |
+
filename = f"youtube_{video_id}_{int(time.time())}"
|
183 |
+
|
184 |
+
temp_dir = tempfile.mkdtemp()
|
185 |
+
downloaded_path = None
|
186 |
+
try:
|
187 |
+
temp_filepath_tmpl = os.path.join(download_dir, f"{filename}.%(ext)s")
|
188 |
+
ydl_opts = {
|
189 |
+
'format': 'bestaudio/best',
|
190 |
+
'outtmpl': temp_filepath_tmpl,
|
191 |
+
'noplaylist': True,
|
192 |
+
'quiet': True,
|
193 |
+
'postprocessors': [{'key': 'FFmpegExtractAudio','preferredcodec': 'mp3','preferredquality': '192',}],
|
194 |
+
'ffmpeg_location': shutil.which("ffmpeg"),
|
195 |
+
}
|
196 |
+
if not ydl_opts['ffmpeg_location']: print("Warning: ffmpeg not found... / 警告:找不到 ffmpeg...")
|
197 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
198 |
+
info_dict = ydl.extract_info(url, download=True)
|
199 |
+
duration = info_dict.get('duration')
|
200 |
+
title = info_dict.get('title', 'unknown')
|
201 |
+
|
202 |
+
final_filepath = ydl.prepare_filename(info_dict)
|
203 |
+
if not final_filepath.endswith('.mp3'):
|
204 |
+
base_name = final_filepath.rsplit('.', 1)[0]
|
205 |
+
final_filepath = base_name + '.mp3'
|
206 |
+
|
207 |
+
if os.path.exists(final_filepath):
|
208 |
+
downloaded_path = final_filepath
|
209 |
+
print(f"YouTube audio downloaded: {downloaded_path}")
|
210 |
+
print(f"Title: {title}, Duration: {duration}s")
|
211 |
+
else:
|
212 |
+
potential_files = [os.path.join(download_dir, f) for f in os.listdir(download_dir) if f.startswith(filename) and f.endswith(".mp3")]
|
213 |
+
if potential_files:
|
214 |
+
downloaded_path = potential_files[0]
|
215 |
+
print(f"Warning: Could not find expected MP3, using fallback: {downloaded_path}")
|
216 |
+
duration = None
|
217 |
+
else:
|
218 |
+
raise FileNotFoundError(f"Audio file not found after download in {download_dir}")
|
219 |
+
|
220 |
+
return downloaded_path, temp_dir, duration
|
221 |
+
except Exception as e:
|
222 |
+
print(f"Error processing YouTube URL: {e}")
|
223 |
+
if temp_dir and os.path.exists(temp_dir):
|
224 |
+
try: shutil.rmtree(temp_dir)
|
225 |
+
except Exception as cleanup_e: print(f"Error cleaning temp directory {temp_dir}: {cleanup_e}")
|
226 |
+
return None, None, None
|
227 |
|
228 |
# --- Load ASR Pipeline ---
|
229 |
def load_asr_pipeline(model_id):
|
|
|
362 |
def transcribe_audio(mic_input, file_input, youtube_url, selected_model_identifier,
|
363 |
task, language, return_timestamps,
|
364 |
phi4_prompt_text, device_choice,
|
365 |
+
previous_output_text, active_tab):
|
366 |
global pipe, phi4_model, phi4_processor, current_model_name, current_device
|
367 |
audio_source = None
|
368 |
source_type_en = ""
|
|
|
469 |
status_update_str = f"Downloading YouTube Audio / 正在下載 YouTube 音訊..."
|
470 |
output_text_accumulated = status_update_prefix + status_update_str
|
471 |
|
472 |
+
# 檢查是否在 Hugging Face Spaces 環境中
|
473 |
+
is_spaces = os.environ.get("SPACE_ID") is not None
|
474 |
+
if is_spaces:
|
475 |
+
output_text_accumulated = status_update_prefix + "Error: YouTube download is not supported in Hugging Face Spaces. / 錯誤:YouTube 下載在 Hugging Face Spaces 中不可用。"
|
476 |
+
return (output_text_accumulated, gr.update(), gr.update(), gr.update())
|
477 |
|
478 |
+
# 使用 yt-dlp 直接下載
|
479 |
+
audio_path, temp_dir_to_clean, duration_yt = download_youtube_audio(youtube_url)
|
480 |
+
|
481 |
+
if audio_path and os.path.exists(audio_path):
|
482 |
+
audio_source = audio_path
|
483 |
+
audio_duration = duration_yt
|
484 |
+
else:
|
485 |
+
output_text_accumulated = status_update_prefix + "Error: Failed to download YouTube audio. / 錯誤:無法下載 YouTube 音訊。"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
return (output_text_accumulated, gr.update(), gr.update(), gr.update())
|
487 |
else:
|
488 |
# 如果沒有選擇任何輸入源或當前標籤沒有有效輸入
|
|
|
826 |
|
827 |
# 如果在 Spaces 環境中,顯示警告訊息
|
828 |
if is_spaces:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
829 |
gr.Markdown("""
|
830 |
+
⚠️ **注意:YouTube 下載在 Hugging Face Spaces 中不可用**
|
831 |
|
832 |
+
由於安全限制,Spaces 環境無法通過 YouTube 的機器人驗證。請在本地環境中使用此功能。
|
833 |
|
834 |
+
⚠️ **Note: YouTube download is not available in Hugging Face Spaces**
|
835 |
|
836 |
+
Due to security restrictions, Spaces environment cannot pass YouTube's bot verification. Please use this feature in a local environment.
|
837 |
+
""", elem_classes="warning-box")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
838 |
|
839 |
# YouTube URL 輸入框
|
840 |
youtube_input = gr.Textbox(label="YouTube URL / 網址", placeholder="Paste YouTube link here / 在此貼上 YouTube 連結")
|
|
|
851 |
is_spaces = os.environ.get("SPACE_ID") is not None
|
852 |
|
853 |
# 檢查是否是在啟動時自動調用(緩存範例)
|
854 |
+
is_startup = True # 簡化邏輯,假設啟動時調用
|
855 |
|
856 |
# ��果是在 Spaces 環境中啟動時調用,則不處理 URL
|
857 |
+
if is_spaces:
|
858 |
+
print("Skipping example URL processing in Spaces environment")
|
859 |
return gr.update(visible=False, value=None), gr.update(visible=False, value=None)
|
860 |
|
|
|
|
|
861 |
# 處理 URL
|
862 |
+
return process_youtube_url(url)
|
863 |
|
864 |
gr.Examples(
|
865 |
examples=[["https://www.youtube.com/watch?v=5D7l0tqQJ7k"]],
|
|
|
925 |
# 連接 YouTube 處理功能
|
926 |
youtube_input.change(
|
927 |
fn=process_youtube_url,
|
928 |
+
inputs=youtube_input,
|
929 |
outputs=[youtube_audio_player, youtube_download],
|
930 |
show_progress=True
|
931 |
)
|
|
|
1014 |
# Main submit action - Corrected outputs list
|
1015 |
submit_button.click(
|
1016 |
fn=transcribe_audio_with_error_handling,
|
1017 |
+
inputs=[mic_input, file_input, youtube_input, model_select, task_input, language_input, timestamp_input, phi4_prompt_input, device_input, output_text, active_tab],
|
1018 |
outputs=[output_text, mic_input, file_input, youtube_input], # 保持原始輸出
|
1019 |
show_progress="full" # 顯示完整進度條
|
1020 |
)
|
requirements.txt
CHANGED
@@ -14,9 +14,6 @@ safetensors>=0.3.0
|
|
14 |
yt-dlp>=2023.0.0
|
15 |
soundfile>=0.12.0
|
16 |
pydub>=0.25.0
|
17 |
-
google-api-python-client>=2.0.0
|
18 |
-
google-auth-httplib2>=0.1.0
|
19 |
-
google-auth-oauthlib>=0.5.0
|
20 |
|
21 |
# Data processing
|
22 |
numpy>=2.0.0
|
|
|
14 |
yt-dlp>=2023.0.0
|
15 |
soundfile>=0.12.0
|
16 |
pydub>=0.25.0
|
|
|
|
|
|
|
17 |
|
18 |
# Data processing
|
19 |
numpy>=2.0.0
|
requirements_local.txt
CHANGED
@@ -17,9 +17,10 @@ safetensors>=0.3.0
|
|
17 |
yt-dlp>=2023.0.0
|
18 |
soundfile>=0.12.0
|
19 |
pydub>=0.25.0
|
20 |
-
|
21 |
-
google-
|
22 |
-
google-auth-
|
|
|
23 |
|
24 |
# Data processing
|
25 |
numpy>=2.0.0
|
|
|
17 |
yt-dlp>=2023.0.0
|
18 |
soundfile>=0.12.0
|
19 |
pydub>=0.25.0
|
20 |
+
# 移除 Google API 相關依賴,因為 YouTube API 不提供下載功能
|
21 |
+
# google-api-python-client>=2.0.0
|
22 |
+
# google-auth-httplib2>=0.1.0
|
23 |
+
# google-auth-oauthlib>=0.5.0
|
24 |
|
25 |
# Data processing
|
26 |
numpy>=2.0.0
|
youtube_api.py
DELETED
@@ -1,265 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
YouTube API 處理模塊
|
3 |
-
使用 YouTube Data API 獲取視頻信息,並使用 yt-dlp 下載音頻
|
4 |
-
"""
|
5 |
-
|
6 |
-
import os
|
7 |
-
import time
|
8 |
-
import tempfile
|
9 |
-
import shutil
|
10 |
-
import yt_dlp
|
11 |
-
from googleapiclient.discovery import build
|
12 |
-
from googleapiclient.errors import HttpError
|
13 |
-
|
14 |
-
# YouTube API 配置
|
15 |
-
YOUTUBE_API_SERVICE_NAME = "youtube"
|
16 |
-
YOUTUBE_API_VERSION = "v3"
|
17 |
-
YOUTUBE_API_KEY = None # 將在運行時設置
|
18 |
-
|
19 |
-
def set_api_key(api_key):
|
20 |
-
"""設置 YouTube API 金鑰"""
|
21 |
-
global YOUTUBE_API_KEY
|
22 |
-
YOUTUBE_API_KEY = api_key
|
23 |
-
return YOUTUBE_API_KEY is not None
|
24 |
-
|
25 |
-
def extract_video_id(youtube_url):
|
26 |
-
"""從 YouTube URL 中提取視頻 ID"""
|
27 |
-
if "youtube.com/watch" in youtube_url:
|
28 |
-
# 標準 YouTube URL
|
29 |
-
video_id = youtube_url.split("v=")[1].split("&")[0]
|
30 |
-
elif "youtu.be/" in youtube_url:
|
31 |
-
# 短 URL
|
32 |
-
video_id = youtube_url.split("youtu.be/")[1].split("?")[0]
|
33 |
-
else:
|
34 |
-
# 不支持的 URL 格式
|
35 |
-
return None
|
36 |
-
return video_id
|
37 |
-
|
38 |
-
def get_video_info(video_id):
|
39 |
-
"""使用 YouTube Data API 獲取視頻信息"""
|
40 |
-
if not YOUTUBE_API_KEY:
|
41 |
-
raise ValueError("YouTube API 金鑰未設置。請先調用 set_api_key() 函數。")
|
42 |
-
|
43 |
-
try:
|
44 |
-
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=YOUTUBE_API_KEY)
|
45 |
-
|
46 |
-
# 獲取視頻詳細信息
|
47 |
-
video_response = youtube.videos().list(
|
48 |
-
part="snippet,contentDetails,statistics",
|
49 |
-
id=video_id
|
50 |
-
).execute()
|
51 |
-
|
52 |
-
# 檢查是否找到視頻
|
53 |
-
if not video_response.get("items"):
|
54 |
-
return None
|
55 |
-
|
56 |
-
video_info = video_response["items"][0]
|
57 |
-
snippet = video_info["snippet"]
|
58 |
-
content_details = video_info["contentDetails"]
|
59 |
-
|
60 |
-
# 解析時長
|
61 |
-
duration_str = content_details["duration"] # 格式: PT#H#M#S
|
62 |
-
duration_seconds = parse_duration(duration_str)
|
63 |
-
|
64 |
-
# 返回視頻信息
|
65 |
-
return {
|
66 |
-
"title": snippet["title"],
|
67 |
-
"description": snippet["description"],
|
68 |
-
"channel": snippet["channelTitle"],
|
69 |
-
"published_at": snippet["publishedAt"],
|
70 |
-
"duration": duration_seconds,
|
71 |
-
"thumbnail": snippet["thumbnails"]["high"]["url"] if "high" in snippet["thumbnails"] else snippet["thumbnails"]["default"]["url"]
|
72 |
-
}
|
73 |
-
|
74 |
-
except HttpError as e:
|
75 |
-
print(f"YouTube API 錯誤: {e}")
|
76 |
-
return None
|
77 |
-
except Exception as e:
|
78 |
-
print(f"獲取視頻信息時發生錯誤: {e}")
|
79 |
-
return None
|
80 |
-
|
81 |
-
def parse_duration(duration_str):
|
82 |
-
"""解析 ISO 8601 時長格式 (PT#H#M#S)"""
|
83 |
-
duration_str = duration_str[2:] # 移除 "PT"
|
84 |
-
hours, minutes, seconds = 0, 0, 0
|
85 |
-
|
86 |
-
# 解析小時
|
87 |
-
if "H" in duration_str:
|
88 |
-
hours_part = duration_str.split("H")[0]
|
89 |
-
hours = int(hours_part)
|
90 |
-
duration_str = duration_str.split("H")[1]
|
91 |
-
|
92 |
-
# 解析分鐘
|
93 |
-
if "M" in duration_str:
|
94 |
-
minutes_part = duration_str.split("M")[0]
|
95 |
-
minutes = int(minutes_part)
|
96 |
-
duration_str = duration_str.split("M")[1]
|
97 |
-
|
98 |
-
# 解析秒
|
99 |
-
if "S" in duration_str:
|
100 |
-
seconds_part = duration_str.split("S")[0]
|
101 |
-
seconds = int(seconds_part)
|
102 |
-
|
103 |
-
# 計算總秒數
|
104 |
-
total_seconds = hours * 3600 + minutes * 60 + seconds
|
105 |
-
return total_seconds
|
106 |
-
|
107 |
-
def download_audio(video_id, api_info=None):
|
108 |
-
"""下載 YouTube 視頻的音頻
|
109 |
-
|
110 |
-
Args:
|
111 |
-
video_id: YouTube 視頻 ID
|
112 |
-
api_info: 從 API 獲取的視頻信息 (可選)
|
113 |
-
|
114 |
-
Returns:
|
115 |
-
tuple: (音頻文件路徑, 臨時目錄, 視頻時長)
|
116 |
-
"""
|
117 |
-
# 使用固定的目錄來存儲下載的音訊文件
|
118 |
-
download_dir = os.path.join(tempfile.gettempdir(), "youtube_downloads")
|
119 |
-
os.makedirs(download_dir, exist_ok=True)
|
120 |
-
|
121 |
-
# 使用視頻 ID 和時間戳作為文件名
|
122 |
-
filename = f"youtube_{video_id}_{int(time.time())}"
|
123 |
-
temp_dir = tempfile.mkdtemp()
|
124 |
-
|
125 |
-
try:
|
126 |
-
# 準備下載路徑
|
127 |
-
temp_filepath_tmpl = os.path.join(download_dir, f"{filename}.%(ext)s")
|
128 |
-
|
129 |
-
# 設置 yt-dlp 選項
|
130 |
-
ydl_opts = {
|
131 |
-
'format': 'bestaudio/best',
|
132 |
-
'outtmpl': temp_filepath_tmpl,
|
133 |
-
'noplaylist': True,
|
134 |
-
'quiet': True,
|
135 |
-
'postprocessors': [{
|
136 |
-
'key': 'FFmpegExtractAudio',
|
137 |
-
'preferredcodec': 'mp3',
|
138 |
-
'preferredquality': '192',
|
139 |
-
}],
|
140 |
-
'ffmpeg_location': shutil.which("ffmpeg"),
|
141 |
-
}
|
142 |
-
|
143 |
-
# 檢查 ffmpeg
|
144 |
-
if not ydl_opts['ffmpeg_location']:
|
145 |
-
print("Warning: ffmpeg not found... / 警告:找不到 ffmpeg...")
|
146 |
-
|
147 |
-
# 如果已經有 API 信息,使用它
|
148 |
-
duration = api_info["duration"] if api_info else None
|
149 |
-
title = api_info["title"] if api_info else "Unknown"
|
150 |
-
|
151 |
-
# 下載音頻
|
152 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
153 |
-
# 如果沒有 API 信息,從 yt-dlp 獲取
|
154 |
-
if not api_info:
|
155 |
-
info_dict = ydl.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=True)
|
156 |
-
duration = info_dict.get('duration')
|
157 |
-
title = info_dict.get('title', 'unknown')
|
158 |
-
else:
|
159 |
-
# 有 API 信息,直接下載
|
160 |
-
ydl.download([f"https://www.youtube.com/watch?v={video_id}"])
|
161 |
-
|
162 |
-
# 確定最終文件路徑
|
163 |
-
final_filepath = os.path.join(download_dir, f"{filename}.mp3")
|
164 |
-
|
165 |
-
# 檢查文件是否存在
|
166 |
-
if os.path.exists(final_filepath):
|
167 |
-
print(f"YouTube audio downloaded: {final_filepath}")
|
168 |
-
print(f"Title: {title}, Duration: {duration}s")
|
169 |
-
return final_filepath, temp_dir, duration
|
170 |
-
else:
|
171 |
-
# 嘗試查找可能的文件
|
172 |
-
potential_files = [
|
173 |
-
os.path.join(download_dir, f)
|
174 |
-
for f in os.listdir(download_dir)
|
175 |
-
if f.startswith(filename) and f.endswith(".mp3")
|
176 |
-
]
|
177 |
-
if potential_files:
|
178 |
-
downloaded_path = potential_files[0]
|
179 |
-
print(f"Warning: Could not find expected MP3, using fallback: {downloaded_path}")
|
180 |
-
return downloaded_path, temp_dir, duration
|
181 |
-
else:
|
182 |
-
raise FileNotFoundError(f"Audio file not found after download in {download_dir}")
|
183 |
-
|
184 |
-
except Exception as e:
|
185 |
-
print(f"Error downloading YouTube audio: {e}")
|
186 |
-
if temp_dir and os.path.exists(temp_dir):
|
187 |
-
try:
|
188 |
-
shutil.rmtree(temp_dir)
|
189 |
-
except Exception as cleanup_e:
|
190 |
-
print(f"Error cleaning temp directory {temp_dir}: {cleanup_e}")
|
191 |
-
return None, None, None
|
192 |
-
|
193 |
-
def process_youtube_url(youtube_url, user_api_key=None):
|
194 |
-
"""處理 YouTube URL,獲取信息並下載音頻
|
195 |
-
|
196 |
-
Args:
|
197 |
-
youtube_url: YouTube 視頻 URL
|
198 |
-
user_api_key: 用戶提供的 API 金鑰(可選)
|
199 |
-
|
200 |
-
Returns:
|
201 |
-
tuple: (音頻文件路徑, 視頻信息)
|
202 |
-
"""
|
203 |
-
# 檢查 URL 是否有效
|
204 |
-
if not youtube_url or not youtube_url.strip():
|
205 |
-
return None, None
|
206 |
-
|
207 |
-
# 檢查是否在 Hugging Face Spaces 環境中
|
208 |
-
is_spaces = os.environ.get("SPACE_ID") is not None
|
209 |
-
|
210 |
-
# 如果提供了用戶 API 金鑰,設置它
|
211 |
-
if user_api_key:
|
212 |
-
set_api_key(user_api_key)
|
213 |
-
|
214 |
-
# 提取視頻 ID
|
215 |
-
video_id = extract_video_id(youtube_url)
|
216 |
-
if not video_id:
|
217 |
-
print(f"Invalid YouTube URL: {youtube_url}")
|
218 |
-
return None, None
|
219 |
-
|
220 |
-
# 檢查是否設置了 API 金鑰
|
221 |
-
if YOUTUBE_API_KEY:
|
222 |
-
# 使用 API 獲取視頻信息
|
223 |
-
video_info = get_video_info(video_id)
|
224 |
-
if not video_info:
|
225 |
-
print(f"Could not get video info from API for: {video_id}")
|
226 |
-
|
227 |
-
# 如果在 Spaces 環境中且沒有 API 信息,則不嘗試下載
|
228 |
-
if is_spaces:
|
229 |
-
raise ValueError("YouTube 下載在 Hugging Face Spaces 中需要有效的 API 金鑰。")
|
230 |
-
|
231 |
-
# 如果 API 失敗,嘗試直接下載
|
232 |
-
audio_path, temp_dir, duration = download_audio(video_id)
|
233 |
-
return audio_path, {"title": "Unknown", "duration": duration}
|
234 |
-
|
235 |
-
# 使用 API 信息下載音頻
|
236 |
-
audio_path, temp_dir, _ = download_audio(video_id, video_info)
|
237 |
-
return audio_path, video_info
|
238 |
-
else:
|
239 |
-
# 沒有 API 金鑰
|
240 |
-
if is_spaces:
|
241 |
-
# 在 Spaces 環境中需要 API 金鑰
|
242 |
-
raise ValueError("YouTube 下載在 Hugging Face Spaces 中需要 API 金鑰。請在上方的 'YouTube API Key Settings' 中輸入您的 API 金鑰。\n\nYouTube download in Hugging Face Spaces requires an API key. Please enter your API key in the 'YouTube API Key Settings' section above.")
|
243 |
-
|
244 |
-
# 本地環境,直接使用 yt-dlp
|
245 |
-
print("No YouTube API key set, using yt-dlp directly")
|
246 |
-
audio_path, temp_dir, duration = download_audio(video_id)
|
247 |
-
return audio_path, {"title": "Unknown", "duration": duration}
|
248 |
-
|
249 |
-
# 測試代碼
|
250 |
-
if __name__ == "__main__":
|
251 |
-
# 設置 API 金鑰(實際使用時應從環境變量或配置文件獲取)
|
252 |
-
api_key = "YOUR_API_KEY"
|
253 |
-
set_api_key(api_key)
|
254 |
-
|
255 |
-
# 測試 URL
|
256 |
-
test_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
257 |
-
|
258 |
-
# 處理 URL
|
259 |
-
audio_path, video_info = process_youtube_url(test_url)
|
260 |
-
|
261 |
-
if audio_path and video_info:
|
262 |
-
print(f"Downloaded: {audio_path}")
|
263 |
-
print(f"Video info: {video_info}")
|
264 |
-
else:
|
265 |
-
print("Failed to process YouTube URL")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|