MLX_GPT_OSS_120B / whisper /yt_dl_harari_lecture.py
TroglodyteDerivations's picture
Upload 48 files
c28358e verified
raw
history blame
5.27 kB
#!/usr/bin/env python3
"""
Comprehensive Yuval Noah Harari Lecture Downloader with FFmpeg check
"""
import subprocess
import sys
import os
import platform
from pathlib import Path
import logging
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def check_ffmpeg_installed():
"""Check if ffmpeg is installed"""
try:
result = subprocess.run(["ffmpeg", "-version"],
capture_output=True, text=True, timeout=10)
return result.returncode == 0
except (subprocess.TimeoutExpired, FileNotFoundError):
return False
def install_ffmpeg():
"""Install ffmpeg using Homebrew"""
logger.info("πŸ“¦ Installing ffmpeg via Homebrew...")
try:
result = subprocess.run(["brew", "install", "ffmpeg"],
capture_output=True, text=True, timeout=300)
if result.returncode == 0:
logger.info("βœ… ffmpeg installed successfully")
return True
else:
logger.error(f"❌ Failed to install ffmpeg: {result.stderr}")
logger.info("πŸ’‘ You can install ffmpeg manually: brew install ffmpeg")
return False
except (subprocess.TimeoutExpired, FileNotFoundError):
logger.error("❌ Homebrew not found or installation timed out")
logger.info("πŸ’‘ Install Homebrew first: /bin/bash -c \"$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)\"")
return False
def check_yt_dlp_installed():
"""Check if yt-dlp is already installed"""
try:
result = subprocess.run([sys.executable, "-m", "yt_dlp", "--version"],
capture_output=True, text=True, timeout=10)
return result.returncode == 0
except (subprocess.TimeoutExpired, FileNotFoundError):
return False
def install_yt_dlp():
"""Install yt-dlp using pip"""
logger.info("πŸ“¦ Installing yt-dlp...")
try:
result = subprocess.run([
sys.executable, "-m", "pip", "install", "--upgrade", "yt-dlp"
], capture_output=True, text=True, timeout=300)
if result.returncode == 0:
logger.info("βœ… yt-dlp installed successfully")
return True
else:
logger.error(f"❌ Failed to install yt-dlp: {result.stderr}")
return False
except subprocess.TimeoutExpired:
logger.error("❌ Installation timed out")
return False
def download_lecture():
"""Download the Yuval Noah Harari lecture"""
lecture_url = "https://www.youtube.com/watch?v=0BnZMeFtoAM"
output_template = "yuval_harari_lecture.%(ext)s"
logger.info("🎧 Downloading Yuval Noah Harari lecture...")
logger.info(f"πŸ“Ί URL: {lecture_url}")
try:
# Download as high-quality MP3
result = subprocess.run([
sys.executable, "-m", "yt_dlp",
"-x", # Extract audio
"--audio-format", "mp3", # Convert to MP3
"--audio-quality", "0", # Best quality
"--output", output_template,
"--no-overwrites", # Don't re-download if file exists
lecture_url
], capture_output=True, text=True, timeout=3600)
if result.returncode == 0:
logger.info("βœ… Download completed successfully!")
# Check for the downloaded file
if os.path.exists("yuval_harari_lecture.mp3"):
size_mb = os.path.getsize("yuval_harari_lecture.mp3") / (1024 * 1024)
logger.info(f"πŸ“ File: yuval_harari_lecture.mp3")
logger.info(f"πŸ“Š Size: {size_mb:.1f} MB")
return True
else:
logger.error(f"❌ Download failed: {result.stderr}")
return False
except subprocess.TimeoutExpired:
logger.error("❌ Download timed out")
return False
def main():
"""Main function to orchestrate the download process"""
logger.info("=" * 60)
logger.info("πŸŽ“ Yuval Noah Harari Lecture Downloader")
logger.info("=" * 60)
# Check if ffmpeg is installed
if not check_ffmpeg_installed():
logger.warning("⚠️ ffmpeg not found - required for audio conversion")
if not install_ffmpeg():
logger.error("❌ Please install ffmpeg manually: brew install ffmpeg")
return 1
# Check if yt-dlp is installed
if not check_yt_dlp_installed():
logger.info("yt-dlp not found, installing...")
if not install_yt_dlp():
return 1
else:
logger.info("βœ… yt-dlp is already installed")
# Download the lecture
if not download_lecture():
return 1
logger.info("=" * 60)
logger.info("πŸŽ‰ Download process completed!")
logger.info("πŸ’‘ You can now use the audio file for transcription with Whisper")
logger.info("=" * 60)
return 0
if __name__ == "__main__":
try:
exit(main())
except KeyboardInterrupt:
logger.info("\n⏹️ Operation cancelled by user")
exit(1)