hchcsuim's picture
Fix YouTube example URL processing in Spaces environment
d2b4586
raw
history blame
9.46 kB
"""
YouTube API 處理模塊
使用 YouTube Data API 獲取視頻信息,並使用 yt-dlp 下載音頻
"""
import os
import time
import tempfile
import shutil
import yt_dlp
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# YouTube API 配置
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
YOUTUBE_API_KEY = None # 將在運行時設置
def set_api_key(api_key):
"""設置 YouTube API 金鑰"""
global YOUTUBE_API_KEY
YOUTUBE_API_KEY = api_key
return YOUTUBE_API_KEY is not None
def extract_video_id(youtube_url):
"""從 YouTube URL 中提取視頻 ID"""
if "youtube.com/watch" in youtube_url:
# 標準 YouTube URL
video_id = youtube_url.split("v=")[1].split("&")[0]
elif "youtu.be/" in youtube_url:
# 短 URL
video_id = youtube_url.split("youtu.be/")[1].split("?")[0]
else:
# 不支持的 URL 格式
return None
return video_id
def get_video_info(video_id):
"""使用 YouTube Data API 獲取視頻信息"""
if not YOUTUBE_API_KEY:
raise ValueError("YouTube API 金鑰未設置。請先調用 set_api_key() 函數。")
try:
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=YOUTUBE_API_KEY)
# 獲取視頻詳細信息
video_response = youtube.videos().list(
part="snippet,contentDetails,statistics",
id=video_id
).execute()
# 檢查是否找到視頻
if not video_response.get("items"):
return None
video_info = video_response["items"][0]
snippet = video_info["snippet"]
content_details = video_info["contentDetails"]
# 解析時長
duration_str = content_details["duration"] # 格式: PT#H#M#S
duration_seconds = parse_duration(duration_str)
# 返回視頻信息
return {
"title": snippet["title"],
"description": snippet["description"],
"channel": snippet["channelTitle"],
"published_at": snippet["publishedAt"],
"duration": duration_seconds,
"thumbnail": snippet["thumbnails"]["high"]["url"] if "high" in snippet["thumbnails"] else snippet["thumbnails"]["default"]["url"]
}
except HttpError as e:
print(f"YouTube API 錯誤: {e}")
return None
except Exception as e:
print(f"獲取視頻信息時發生錯誤: {e}")
return None
def parse_duration(duration_str):
"""解析 ISO 8601 時長格式 (PT#H#M#S)"""
duration_str = duration_str[2:] # 移除 "PT"
hours, minutes, seconds = 0, 0, 0
# 解析小時
if "H" in duration_str:
hours_part = duration_str.split("H")[0]
hours = int(hours_part)
duration_str = duration_str.split("H")[1]
# 解析分鐘
if "M" in duration_str:
minutes_part = duration_str.split("M")[0]
minutes = int(minutes_part)
duration_str = duration_str.split("M")[1]
# 解析秒
if "S" in duration_str:
seconds_part = duration_str.split("S")[0]
seconds = int(seconds_part)
# 計算總秒數
total_seconds = hours * 3600 + minutes * 60 + seconds
return total_seconds
def download_audio(video_id, api_info=None):
"""下載 YouTube 視頻的音頻
Args:
video_id: YouTube 視頻 ID
api_info: 從 API 獲取的視頻信息 (可選)
Returns:
tuple: (音頻文件路徑, 臨時目錄, 視頻時長)
"""
# 使用固定的目錄來存儲下載的音訊文件
download_dir = os.path.join(tempfile.gettempdir(), "youtube_downloads")
os.makedirs(download_dir, exist_ok=True)
# 使用視頻 ID 和時間戳作為文件名
filename = f"youtube_{video_id}_{int(time.time())}"
temp_dir = tempfile.mkdtemp()
try:
# 準備下載路徑
temp_filepath_tmpl = os.path.join(download_dir, f"{filename}.%(ext)s")
# 設置 yt-dlp 選項
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': temp_filepath_tmpl,
'noplaylist': True,
'quiet': True,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}],
'ffmpeg_location': shutil.which("ffmpeg"),
}
# 檢查 ffmpeg
if not ydl_opts['ffmpeg_location']:
print("Warning: ffmpeg not found... / 警告:找不到 ffmpeg...")
# 如果已經有 API 信息,使用它
duration = api_info["duration"] if api_info else None
title = api_info["title"] if api_info else "Unknown"
# 下載音頻
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# 如果沒有 API 信息,從 yt-dlp 獲取
if not api_info:
info_dict = ydl.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=True)
duration = info_dict.get('duration')
title = info_dict.get('title', 'unknown')
else:
# 有 API 信息,直接下載
ydl.download([f"https://www.youtube.com/watch?v={video_id}"])
# 確定最終文件路徑
final_filepath = os.path.join(download_dir, f"{filename}.mp3")
# 檢查文件是否存在
if os.path.exists(final_filepath):
print(f"YouTube audio downloaded: {final_filepath}")
print(f"Title: {title}, Duration: {duration}s")
return final_filepath, temp_dir, duration
else:
# 嘗試查找可能的文件
potential_files = [
os.path.join(download_dir, f)
for f in os.listdir(download_dir)
if f.startswith(filename) and f.endswith(".mp3")
]
if potential_files:
downloaded_path = potential_files[0]
print(f"Warning: Could not find expected MP3, using fallback: {downloaded_path}")
return downloaded_path, temp_dir, duration
else:
raise FileNotFoundError(f"Audio file not found after download in {download_dir}")
except Exception as e:
print(f"Error downloading YouTube audio: {e}")
if temp_dir and os.path.exists(temp_dir):
try:
shutil.rmtree(temp_dir)
except Exception as cleanup_e:
print(f"Error cleaning temp directory {temp_dir}: {cleanup_e}")
return None, None, None
def process_youtube_url(youtube_url, user_api_key=None):
"""處理 YouTube URL,獲取信息並下載音頻
Args:
youtube_url: YouTube 視頻 URL
user_api_key: 用戶提供的 API 金鑰(可選)
Returns:
tuple: (音頻文件路徑, 視頻信息)
"""
# 檢查 URL 是否有效
if not youtube_url or not youtube_url.strip():
return None, None
# 檢查是否在 Hugging Face Spaces 環境中
is_spaces = os.environ.get("SPACE_ID") is not None
# 如果提供了用戶 API 金鑰,設置它
if user_api_key:
set_api_key(user_api_key)
# 提取視頻 ID
video_id = extract_video_id(youtube_url)
if not video_id:
print(f"Invalid YouTube URL: {youtube_url}")
return None, None
# 檢查是否設置了 API 金鑰
if YOUTUBE_API_KEY:
# 使用 API 獲取視頻信息
video_info = get_video_info(video_id)
if not video_info:
print(f"Could not get video info from API for: {video_id}")
# 如果在 Spaces 環境中且沒有 API 信息,則不嘗試下載
if is_spaces:
raise ValueError("YouTube 下載在 Hugging Face Spaces 中需要有效的 API 金鑰。")
# 如果 API 失敗,嘗試直接下載
audio_path, temp_dir, duration = download_audio(video_id)
return audio_path, {"title": "Unknown", "duration": duration}
# 使用 API 信息下載音頻
audio_path, temp_dir, _ = download_audio(video_id, video_info)
return audio_path, video_info
else:
# 沒有 API 金鑰
if is_spaces:
# 在 Spaces 環境中需要 API 金鑰
raise ValueError("YouTube 下載在 Hugging Face Spaces 中需要 API 金鑰。請在上方的 'YouTube API Key Settings' 中輸入您的 API 金鑰。\n\nYouTube download in Hugging Face Spaces requires an API key. Please enter your API key in the 'YouTube API Key Settings' section above.")
# 本地環境,直接使用 yt-dlp
print("No YouTube API key set, using yt-dlp directly")
audio_path, temp_dir, duration = download_audio(video_id)
return audio_path, {"title": "Unknown", "duration": duration}
# 測試代碼
if __name__ == "__main__":
# 設置 API 金鑰(實際使用時應從環境變量或配置文件獲取)
api_key = "YOUR_API_KEY"
set_api_key(api_key)
# 測試 URL
test_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
# 處理 URL
audio_path, video_info = process_youtube_url(test_url)
if audio_path and video_info:
print(f"Downloaded: {audio_path}")
print(f"Video info: {video_info}")
else:
print("Failed to process YouTube URL")