""" YouTube API 處理模塊 使用 YouTube Data API 獲取視頻信息,並使用 yt-dlp 下載音頻 """ import os import time import tempfile import shutil import yt_dlp from googleapiclient.discovery import build from googleapiclient.errors import HttpError # YouTube API 配置 YOUTUBE_API_SERVICE_NAME = "youtube" YOUTUBE_API_VERSION = "v3" YOUTUBE_API_KEY = None # 將在運行時設置 def set_api_key(api_key): """設置 YouTube API 金鑰""" global YOUTUBE_API_KEY YOUTUBE_API_KEY = api_key return YOUTUBE_API_KEY is not None def extract_video_id(youtube_url): """從 YouTube URL 中提取視頻 ID""" if "youtube.com/watch" in youtube_url: # 標準 YouTube URL video_id = youtube_url.split("v=")[1].split("&")[0] elif "youtu.be/" in youtube_url: # 短 URL video_id = youtube_url.split("youtu.be/")[1].split("?")[0] else: # 不支持的 URL 格式 return None return video_id def get_video_info(video_id): """使用 YouTube Data API 獲取視頻信息""" if not YOUTUBE_API_KEY: raise ValueError("YouTube API 金鑰未設置。請先調用 set_api_key() 函數。") try: youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=YOUTUBE_API_KEY) # 獲取視頻詳細信息 video_response = youtube.videos().list( part="snippet,contentDetails,statistics", id=video_id ).execute() # 檢查是否找到視頻 if not video_response.get("items"): return None video_info = video_response["items"][0] snippet = video_info["snippet"] content_details = video_info["contentDetails"] # 解析時長 duration_str = content_details["duration"] # 格式: PT#H#M#S duration_seconds = parse_duration(duration_str) # 返回視頻信息 return { "title": snippet["title"], "description": snippet["description"], "channel": snippet["channelTitle"], "published_at": snippet["publishedAt"], "duration": duration_seconds, "thumbnail": snippet["thumbnails"]["high"]["url"] if "high" in snippet["thumbnails"] else snippet["thumbnails"]["default"]["url"] } except HttpError as e: print(f"YouTube API 錯誤: {e}") return None except Exception as e: print(f"獲取視頻信息時發生錯誤: {e}") return None def parse_duration(duration_str): """解析 ISO 8601 時長格式 (PT#H#M#S)""" duration_str = duration_str[2:] # 移除 "PT" hours, minutes, seconds = 0, 0, 0 # 解析小時 if "H" in duration_str: hours_part = duration_str.split("H")[0] hours = int(hours_part) duration_str = duration_str.split("H")[1] # 解析分鐘 if "M" in duration_str: minutes_part = duration_str.split("M")[0] minutes = int(minutes_part) duration_str = duration_str.split("M")[1] # 解析秒 if "S" in duration_str: seconds_part = duration_str.split("S")[0] seconds = int(seconds_part) # 計算總秒數 total_seconds = hours * 3600 + minutes * 60 + seconds return total_seconds def download_audio(video_id, api_info=None): """下載 YouTube 視頻的音頻 Args: video_id: YouTube 視頻 ID api_info: 從 API 獲取的視頻信息 (可選) Returns: tuple: (音頻文件路徑, 臨時目錄, 視頻時長) """ # 使用固定的目錄來存儲下載的音訊文件 download_dir = os.path.join(tempfile.gettempdir(), "youtube_downloads") os.makedirs(download_dir, exist_ok=True) # 使用視頻 ID 和時間戳作為文件名 filename = f"youtube_{video_id}_{int(time.time())}" temp_dir = tempfile.mkdtemp() try: # 準備下載路徑 temp_filepath_tmpl = os.path.join(download_dir, f"{filename}.%(ext)s") # 設置 yt-dlp 選項 ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': temp_filepath_tmpl, 'noplaylist': True, 'quiet': True, 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }], 'ffmpeg_location': shutil.which("ffmpeg"), } # 檢查 ffmpeg if not ydl_opts['ffmpeg_location']: print("Warning: ffmpeg not found... / 警告:找不到 ffmpeg...") # 如果已經有 API 信息,使用它 duration = api_info["duration"] if api_info else None title = api_info["title"] if api_info else "Unknown" # 下載音頻 with yt_dlp.YoutubeDL(ydl_opts) as ydl: # 如果沒有 API 信息,從 yt-dlp 獲取 if not api_info: info_dict = ydl.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=True) duration = info_dict.get('duration') title = info_dict.get('title', 'unknown') else: # 有 API 信息,直接下載 ydl.download([f"https://www.youtube.com/watch?v={video_id}"]) # 確定最終文件路徑 final_filepath = os.path.join(download_dir, f"{filename}.mp3") # 檢查文件是否存在 if os.path.exists(final_filepath): print(f"YouTube audio downloaded: {final_filepath}") print(f"Title: {title}, Duration: {duration}s") return final_filepath, temp_dir, duration else: # 嘗試查找可能的文件 potential_files = [ os.path.join(download_dir, f) for f in os.listdir(download_dir) if f.startswith(filename) and f.endswith(".mp3") ] if potential_files: downloaded_path = potential_files[0] print(f"Warning: Could not find expected MP3, using fallback: {downloaded_path}") return downloaded_path, temp_dir, duration else: raise FileNotFoundError(f"Audio file not found after download in {download_dir}") except Exception as e: print(f"Error downloading YouTube audio: {e}") if temp_dir and os.path.exists(temp_dir): try: shutil.rmtree(temp_dir) except Exception as cleanup_e: print(f"Error cleaning temp directory {temp_dir}: {cleanup_e}") return None, None, None def process_youtube_url(youtube_url): """處理 YouTube URL,獲取信息並下載音頻 Args: youtube_url: YouTube 視頻 URL Returns: tuple: (音頻文件路徑, 視頻信息) """ # 檢查 URL 是否有效 if not youtube_url or not youtube_url.strip(): return None, None # 提取視頻 ID video_id = extract_video_id(youtube_url) if not video_id: print(f"Invalid YouTube URL: {youtube_url}") return None, None # 檢查是否設置了 API 金鑰 if YOUTUBE_API_KEY: # 使用 API 獲取視頻信息 video_info = get_video_info(video_id) if not video_info: print(f"Could not get video info from API for: {video_id}") # 如果 API 失敗,嘗試直接下載 audio_path, temp_dir, duration = download_audio(video_id) return audio_path, {"title": "Unknown", "duration": duration} # 使用 API 信息下載音頻 audio_path, temp_dir, _ = download_audio(video_id, video_info) return audio_path, video_info else: # 沒有 API 金鑰,直接使用 yt-dlp print("No YouTube API key set, using yt-dlp directly") audio_path, temp_dir, duration = download_audio(video_id) return audio_path, {"title": "Unknown", "duration": duration} # 測試代碼 if __name__ == "__main__": # 設置 API 金鑰(實際使用時應從環境變量或配置文件獲取) api_key = "YOUR_API_KEY" set_api_key(api_key) # 測試 URL test_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ" # 處理 URL audio_path, video_info = process_youtube_url(test_url) if audio_path and video_info: print(f"Downloaded: {audio_path}") print(f"Video info: {video_info}") else: print("Failed to process YouTube URL")