|
|
|
""" |
|
Working Whisper Transcription for Apple M3 Ultra (CPU Version) |
|
Fixes MPS compatibility issues by using CPU |
|
""" |
|
|
|
import whisper |
|
import torch |
|
import time |
|
from pathlib import Path |
|
import logging |
|
import os |
|
import sys |
|
import subprocess |
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
def check_environment(): |
|
"""Check the current environment and suggest fixes""" |
|
logger.info("๐ Checking environment...") |
|
|
|
|
|
logger.info(f"๐ Python version: {sys.version}") |
|
logger.info(f"๐ฅ PyTorch version: {torch.__version__}") |
|
logger.info(f"๐ค Whisper version: {whisper.__version__}") |
|
|
|
|
|
mps_available = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() |
|
logger.info(f"๐ MPS available: {mps_available}") |
|
|
|
|
|
cuda_available = torch.cuda.is_available() |
|
logger.info(f"๐ฎ CUDA available: {cuda_available}") |
|
|
|
|
|
logger.info("๐ก Using CPU for stable transcription (MPS has compatibility issues)") |
|
return "cpu" |
|
|
|
def transcribe_with_cpu(audio_file, model_size="medium"): |
|
"""Transcribe using CPU for maximum compatibility""" |
|
logger.info(f"๐ง Transcribing: {audio_file}") |
|
logger.info(f"๐ค Using model: {model_size}") |
|
logger.info("โก Device: CPU (stable mode)") |
|
|
|
try: |
|
start_time = time.time() |
|
|
|
|
|
model = whisper.load_model( |
|
model_size, |
|
device="cpu", |
|
download_root="./whisper_models" |
|
) |
|
|
|
|
|
logger.info("๐ค Starting transcription...") |
|
result = model.transcribe( |
|
audio_file, |
|
verbose=True, |
|
fp16=False, |
|
temperature=0.0, |
|
best_of=1, |
|
language="en" |
|
) |
|
|
|
return result, time.time() - start_time |
|
|
|
except Exception as e: |
|
logger.error(f"โ Transcription failed: {e}") |
|
return None, 0 |
|
|
|
def estimate_time(audio_file, model_size): |
|
"""Estimate transcription time based on file size""" |
|
file_size_mb = os.path.getsize(audio_file) / (1024 * 1024) |
|
|
|
|
|
time_per_mb = { |
|
'tiny': 1.5, |
|
'base': 2.0, |
|
'small': 3.0, |
|
'medium': 4.5, |
|
'large': 6.0 |
|
} |
|
|
|
estimated_seconds = file_size_mb * time_per_mb.get(model_size, 4.5) |
|
minutes = int(estimated_seconds // 60) |
|
seconds = int(estimated_seconds % 60) |
|
|
|
return f"{minutes}:{seconds:02d}" |
|
|
|
def main(): |
|
audio_file = "yuval_harari_lecture.mp3" |
|
|
|
if not Path(audio_file).exists(): |
|
logger.error(f"โ Audio file not found: {audio_file}") |
|
logger.info("๐ก Run: python comprehensive_yt_dl.py to download the lecture") |
|
return |
|
|
|
|
|
file_size_mb = os.path.getsize(audio_file) / (1024 * 1024) |
|
logger.info(f"๐ File size: {file_size_mb:.1f} MB") |
|
|
|
|
|
estimated_time = estimate_time(audio_file, "medium") |
|
logger.info(f"โฑ๏ธ Estimated time: ~{estimated_time}") |
|
|
|
|
|
device = check_environment() |
|
|
|
logger.info("๐ Starting transcription...") |
|
logger.info("โ ๏ธ This may take a while on CPU - be patient!") |
|
|
|
|
|
result, duration = transcribe_with_cpu(audio_file, "medium") |
|
|
|
if result: |
|
|
|
output_file = f"{Path(audio_file).stem}_transcript.txt" |
|
with open(output_file, 'w', encoding='utf-8') as f: |
|
f.write(result['text']) |
|
|
|
|
|
minutes = int(duration // 60) |
|
seconds = int(duration % 60) |
|
|
|
logger.info(f"โ
Transcription completed in {minutes}:{seconds:02d}") |
|
logger.info(f"๐ Saved to: {output_file}") |
|
logger.info(f"๐ Word count: {len(result['text'].split()):,}") |
|
|
|
|
|
preview = result['text'][:500] + "..." if len(result['text']) > 500 else result['text'] |
|
logger.info(f"๐ Preview:\n{preview}") |
|
|
|
|
|
save_additional_formats(Path(audio_file).stem, result) |
|
|
|
else: |
|
logger.error("โ Transcription failed completely") |
|
|
|
def save_additional_formats(base_name, result): |
|
"""Save transcript in additional formats""" |
|
|
|
json_path = f"{base_name}_timestamps.json" |
|
try: |
|
import json |
|
with open(json_path, 'w', encoding='utf-8') as f: |
|
json.dump(result, f, indent=2, ensure_ascii=False) |
|
logger.info(f"โฐ Timestamps saved to: {json_path}") |
|
except Exception as e: |
|
logger.warning(f"โ ๏ธ Could not save JSON: {e}") |
|
|
|
|
|
try: |
|
subprocess.run([ |
|
"whisper", f"{base_name}.mp3", |
|
"--model", "medium", |
|
"--output_format", "srt", |
|
"--device", "cpu", |
|
"--language", "en" |
|
], timeout=300) |
|
if Path(f"{base_name}.srt").exists(): |
|
logger.info(f"๐ฌ Subtitles saved to: {base_name}.srt") |
|
except: |
|
pass |
|
|
|
if __name__ == "__main__": |
|
main() |