Prathamesh Sarjerao Vaidya
commited on
Commit
·
7739a22
1
Parent(s):
4d857f2
made changes
Browse files- Dockerfile +5 -5
- model_preloader.py +61 -61
- requirements.txt +1 -1
- startup.py +44 -44
Dockerfile
CHANGED
|
@@ -35,9 +35,9 @@ COPY requirements.txt .
|
|
| 35 |
# Install Python dependencies with proper error handling
|
| 36 |
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
|
| 37 |
# Install ONNX Runtime CPU version specifically
|
| 38 |
-
pip install --no-cache-dir onnxruntime==1.16.3 && \
|
| 39 |
# Fix executable stack issue
|
| 40 |
-
find /usr/local/lib/python*/site-packages/onnxruntime -name "*.so" -exec execstack -c {} \; 2>/dev/null || true && \
|
| 41 |
# Install other requirements
|
| 42 |
pip install --no-cache-dir -r requirements.txt
|
| 43 |
|
|
@@ -71,9 +71,9 @@ ENV PYTHONPATH=/app \
|
|
| 71 |
HF_HUB_CACHE=/app/model_cache \
|
| 72 |
FONTCONFIG_PATH=/tmp/fontconfig \
|
| 73 |
# Critical ONNX Runtime fixes for containers
|
| 74 |
-
ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
|
| 75 |
-
ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
|
| 76 |
-
ORT_DISABLE_TLS_ARENA=1 \
|
| 77 |
CTRANSLATE2_FORCE_CPU_ISA=generic \
|
| 78 |
# Threading and memory optimizations
|
| 79 |
TF_CPP_MIN_LOG_LEVEL=2 \
|
|
|
|
| 35 |
# Install Python dependencies with proper error handling
|
| 36 |
RUN pip install --no-cache-dir --upgrade pip setuptools wheel && \
|
| 37 |
# Install ONNX Runtime CPU version specifically
|
| 38 |
+
# pip install --no-cache-dir onnxruntime==1.16.3 && \
|
| 39 |
# Fix executable stack issue
|
| 40 |
+
# find /usr/local/lib/python*/site-packages/onnxruntime -name "*.so" -exec execstack -c {} \; 2>/dev/null || true && \
|
| 41 |
# Install other requirements
|
| 42 |
pip install --no-cache-dir -r requirements.txt
|
| 43 |
|
|
|
|
| 71 |
HF_HUB_CACHE=/app/model_cache \
|
| 72 |
FONTCONFIG_PATH=/tmp/fontconfig \
|
| 73 |
# Critical ONNX Runtime fixes for containers
|
| 74 |
+
# ORT_DYLIB_DEFAULT_OPTIONS=DisableExecutablePageAllocator=1 \
|
| 75 |
+
# ONNXRUNTIME_EXECUTION_PROVIDERS=CPUExecutionProvider \
|
| 76 |
+
# ORT_DISABLE_TLS_ARENA=1 \
|
| 77 |
CTRANSLATE2_FORCE_CPU_ISA=generic \
|
| 78 |
# Threading and memory optimizations
|
| 79 |
TF_CPP_MIN_LOG_LEVEL=2 \
|
model_preloader.py
CHANGED
|
@@ -30,30 +30,40 @@ from rich.text import Text
|
|
| 30 |
import psutil
|
| 31 |
|
| 32 |
# CRITICAL: Configure ONNX Runtime BEFORE any ML library imports
|
| 33 |
-
import os
|
| 34 |
-
os.environ.update({
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
})
|
| 45 |
|
| 46 |
# Import ONNX Runtime with error suppression
|
| 47 |
-
try:
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
except ImportError:
|
| 54 |
-
|
| 55 |
-
except Exception as e:
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
# Add src directory to path
|
| 59 |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
|
@@ -64,16 +74,6 @@ logger = logging.getLogger(__name__)
|
|
| 64 |
|
| 65 |
console = Console()
|
| 66 |
|
| 67 |
-
# CRITICAL: Set environment variables BEFORE importing any ML libraries
|
| 68 |
-
# This fixes the ONNX Runtime executable stack issue in containers
|
| 69 |
-
os.environ.update({
|
| 70 |
-
'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
| 71 |
-
'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
| 72 |
-
'OMP_NUM_THREADS': '1',
|
| 73 |
-
'TF_ENABLE_ONEDNN_OPTS': '0',
|
| 74 |
-
'TOKENIZERS_PARALLELISM': 'false'
|
| 75 |
-
})
|
| 76 |
-
|
| 77 |
class ModelPreloader:
|
| 78 |
"""Comprehensive model preloader with enhanced local cache detection."""
|
| 79 |
|
|
@@ -397,20 +397,20 @@ class ModelPreloader:
|
|
| 397 |
except Exception as e:
|
| 398 |
logger.warning(f"Error saving cache for {model_key}: {e}")
|
| 399 |
|
| 400 |
-
def load_pyannote_pipeline(self) -> Optional[Pipeline]:
|
| 401 |
"""Load pyannote speaker diarization pipeline with container-safe settings."""
|
| 402 |
try:
|
| 403 |
console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
|
| 404 |
|
| 405 |
# Fix ONNX Runtime libraries first
|
| 406 |
-
try:
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
except:
|
| 413 |
-
|
| 414 |
|
| 415 |
# Check for HuggingFace token
|
| 416 |
hf_token = os.getenv('HUGGINGFACE_TOKEN') or os.getenv('HF_TOKEN')
|
|
@@ -429,7 +429,7 @@ class ModelPreloader:
|
|
| 429 |
os.environ['ORT_LOGGING_LEVEL'] = '3' # ERROR only
|
| 430 |
|
| 431 |
# Disable other verbose logging
|
| 432 |
-
logging.getLogger('onnxruntime').setLevel(logging.ERROR)
|
| 433 |
logging.getLogger('transformers').setLevel(logging.ERROR)
|
| 434 |
|
| 435 |
try:
|
|
@@ -453,28 +453,28 @@ class ModelPreloader:
|
|
| 453 |
warnings.filters[:] = old_warning_filters
|
| 454 |
|
| 455 |
except Exception as e:
|
| 456 |
-
error_msg = str(e).lower()
|
| 457 |
-
if "executable stack" in error_msg or "onnxruntime" in error_msg:
|
| 458 |
-
|
| 459 |
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
else:
|
| 477 |
-
|
| 478 |
|
| 479 |
logger.error(f"Pyannote loading failed: {e}")
|
| 480 |
return None
|
|
|
|
| 30 |
import psutil
|
| 31 |
|
| 32 |
# CRITICAL: Configure ONNX Runtime BEFORE any ML library imports
|
| 33 |
+
# import os
|
| 34 |
+
# os.environ.update({
|
| 35 |
+
# 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
| 36 |
+
# 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
| 37 |
+
# 'ORT_DISABLE_TLS_ARENA': '1',
|
| 38 |
+
# 'OMP_NUM_THREADS': '1',
|
| 39 |
+
# 'MKL_NUM_THREADS': '1',
|
| 40 |
+
# 'NUMBA_NUM_THREADS': '1',
|
| 41 |
+
# 'TF_ENABLE_ONEDNN_OPTS': '0',
|
| 42 |
+
# 'TOKENIZERS_PARALLELISM': 'false',
|
| 43 |
+
# 'MALLOC_ARENA_MAX': '2'
|
| 44 |
+
# })
|
| 45 |
|
| 46 |
# Import ONNX Runtime with error suppression
|
| 47 |
+
# try:
|
| 48 |
+
# import warnings
|
| 49 |
+
# warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
|
| 50 |
+
# import onnxruntime as ort
|
| 51 |
+
# # Force CPU provider only
|
| 52 |
+
# ort.set_default_logger_severity(3) # ERROR level only
|
| 53 |
+
# except ImportError:
|
| 54 |
+
# pass
|
| 55 |
+
# except Exception as e:
|
| 56 |
+
# print(f"ONNX Runtime warning (expected in containers): {e}")
|
| 57 |
+
|
| 58 |
+
# CRITICAL: Set environment variables BEFORE importing any ML libraries
|
| 59 |
+
# This fixes the ONNX Runtime executable stack issue in containers
|
| 60 |
+
# os.environ.update({
|
| 61 |
+
# 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
| 62 |
+
# 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
| 63 |
+
# 'OMP_NUM_THREADS': '1',
|
| 64 |
+
# 'TF_ENABLE_ONEDNN_OPTS': '0',
|
| 65 |
+
# 'TOKENIZERS_PARALLELISM': 'false'
|
| 66 |
+
# })
|
| 67 |
|
| 68 |
# Add src directory to path
|
| 69 |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
|
|
|
| 74 |
|
| 75 |
console = Console()
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
class ModelPreloader:
|
| 78 |
"""Comprehensive model preloader with enhanced local cache detection."""
|
| 79 |
|
|
|
|
| 397 |
except Exception as e:
|
| 398 |
logger.warning(f"Error saving cache for {model_key}: {e}")
|
| 399 |
|
| 400 |
+
def load_pyannote_pipeline(self, task_id: str) -> Optional[Pipeline]:
|
| 401 |
"""Load pyannote speaker diarization pipeline with container-safe settings."""
|
| 402 |
try:
|
| 403 |
console.print(f"[yellow]Loading pyannote.audio pipeline...[/yellow]")
|
| 404 |
|
| 405 |
# Fix ONNX Runtime libraries first
|
| 406 |
+
# try:
|
| 407 |
+
# import subprocess
|
| 408 |
+
# subprocess.run([
|
| 409 |
+
# 'find', '/usr/local/lib/python*/site-packages/onnxruntime',
|
| 410 |
+
# '-name', '*.so', '-exec', 'execstack', '-c', '{}', ';'
|
| 411 |
+
# ], capture_output=True, timeout=10, stderr=subprocess.DEVNULL)
|
| 412 |
+
# except:
|
| 413 |
+
# pass
|
| 414 |
|
| 415 |
# Check for HuggingFace token
|
| 416 |
hf_token = os.getenv('HUGGINGFACE_TOKEN') or os.getenv('HF_TOKEN')
|
|
|
|
| 429 |
os.environ['ORT_LOGGING_LEVEL'] = '3' # ERROR only
|
| 430 |
|
| 431 |
# Disable other verbose logging
|
| 432 |
+
# logging.getLogger('onnxruntime').setLevel(logging.ERROR)
|
| 433 |
logging.getLogger('transformers').setLevel(logging.ERROR)
|
| 434 |
|
| 435 |
try:
|
|
|
|
| 453 |
warnings.filters[:] = old_warning_filters
|
| 454 |
|
| 455 |
except Exception as e:
|
| 456 |
+
# error_msg = str(e).lower()
|
| 457 |
+
# if "executable stack" in error_msg or "onnxruntime" in error_msg:
|
| 458 |
+
# console.print("[yellow]ONNX Runtime container warning (attempting workaround)...[/yellow]")
|
| 459 |
|
| 460 |
+
# # Try alternative approach - load without ONNX-dependent components
|
| 461 |
+
# try:
|
| 462 |
+
# # Try loading with CPU-only execution providers
|
| 463 |
+
# import onnxruntime as ort
|
| 464 |
+
# ort.set_default_logger_severity(4) # FATAL only
|
| 465 |
|
| 466 |
+
# pipeline = Pipeline.from_pretrained(
|
| 467 |
+
# "pyannote/speaker-diarization-3.1",
|
| 468 |
+
# use_auth_token=hf_token,
|
| 469 |
+
# cache_dir=str(self.cache_dir / "pyannote")
|
| 470 |
+
# )
|
| 471 |
+
# console.print(f"[green]SUCCESS: pyannote.audio loaded with workaround[/green]")
|
| 472 |
+
# return pipeline
|
| 473 |
|
| 474 |
+
# except Exception as e2:
|
| 475 |
+
# console.print(f"[red]ERROR: All pyannote loading methods failed: {e2}[/red]")
|
| 476 |
+
# else:
|
| 477 |
+
# console.print(f"[red]ERROR: Failed to load pyannote.audio pipeline: {e}[/red]")
|
| 478 |
|
| 479 |
logger.error(f"Pyannote loading failed: {e}")
|
| 480 |
return None
|
requirements.txt
CHANGED
|
@@ -4,7 +4,7 @@ torchaudio==2.0.2
|
|
| 4 |
torchvision==0.15.2
|
| 5 |
|
| 6 |
# Keep regular ONNX Runtime with container-safe environment variables
|
| 7 |
-
onnxruntime==1.15.1
|
| 8 |
|
| 9 |
# Audio processing
|
| 10 |
pyannote.audio==3.1.1
|
|
|
|
| 4 |
torchvision==0.15.2
|
| 5 |
|
| 6 |
# Keep regular ONNX Runtime with container-safe environment variables
|
| 7 |
+
# onnxruntime==1.15.1
|
| 8 |
|
| 9 |
# Audio processing
|
| 10 |
pyannote.audio==3.1.1
|
startup.py
CHANGED
|
@@ -5,21 +5,21 @@ Handles model preloading and graceful fallbacks for containerized environments.
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
# Suppress ONNX Runtime warnings BEFORE any imports
|
| 8 |
-
import warnings
|
| 9 |
-
warnings.filterwarnings("ignore", message=".*executable stack.*")
|
| 10 |
-
warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
|
| 11 |
|
| 12 |
import os
|
| 13 |
import subprocess
|
| 14 |
import sys
|
| 15 |
import logging
|
| 16 |
|
| 17 |
-
# Set critical environment variables immediately
|
| 18 |
-
os.environ.update({
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
})
|
| 23 |
|
| 24 |
# Configure logging
|
| 25 |
logging.basicConfig(
|
|
@@ -71,39 +71,39 @@ def preload_models():
|
|
| 71 |
logger.info('✅ Model preloader module found')
|
| 72 |
|
| 73 |
# Set comprehensive environment variables for ONNX Runtime
|
| 74 |
-
env = os.environ.copy()
|
| 75 |
-
env.update({
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
})
|
| 89 |
|
| 90 |
-
# Try to fix ONNX Runtime libraries before running preloader
|
| 91 |
-
try:
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
except:
|
| 98 |
-
|
| 99 |
|
| 100 |
# Try to run the preloader
|
| 101 |
result = subprocess.run(
|
| 102 |
['python', 'model_preloader.py'],
|
| 103 |
capture_output=True,
|
| 104 |
text=True,
|
| 105 |
-
timeout=300
|
| 106 |
-
env=env
|
| 107 |
)
|
| 108 |
|
| 109 |
if result.returncode == 0:
|
|
@@ -113,15 +113,15 @@ def preload_models():
|
|
| 113 |
return True
|
| 114 |
else:
|
| 115 |
logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
|
| 116 |
-
if result.stderr:
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
return False
|
| 126 |
|
| 127 |
except subprocess.TimeoutExpired:
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
# Suppress ONNX Runtime warnings BEFORE any imports
|
| 8 |
+
# import warnings
|
| 9 |
+
# warnings.filterwarnings("ignore", message=".*executable stack.*")
|
| 10 |
+
# warnings.filterwarnings("ignore", category=UserWarning, module="onnxruntime")
|
| 11 |
|
| 12 |
import os
|
| 13 |
import subprocess
|
| 14 |
import sys
|
| 15 |
import logging
|
| 16 |
|
| 17 |
+
# # Set critical environment variables immediately
|
| 18 |
+
# os.environ.update({
|
| 19 |
+
# 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
| 20 |
+
# 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
| 21 |
+
# 'ORT_DISABLE_TLS_ARENA': '1'
|
| 22 |
+
# })
|
| 23 |
|
| 24 |
# Configure logging
|
| 25 |
logging.basicConfig(
|
|
|
|
| 71 |
logger.info('✅ Model preloader module found')
|
| 72 |
|
| 73 |
# Set comprehensive environment variables for ONNX Runtime
|
| 74 |
+
# env = os.environ.copy()
|
| 75 |
+
# env.update({
|
| 76 |
+
# 'ORT_DYLIB_DEFAULT_OPTIONS': 'DisableExecutablePageAllocator=1',
|
| 77 |
+
# 'ONNXRUNTIME_EXECUTION_PROVIDERS': 'CPUExecutionProvider',
|
| 78 |
+
# 'ORT_DISABLE_TLS_ARENA': '1',
|
| 79 |
+
# 'TF_ENABLE_ONEDNN_OPTS': '0',
|
| 80 |
+
# 'OMP_NUM_THREADS': '1',
|
| 81 |
+
# 'MKL_NUM_THREADS': '1',
|
| 82 |
+
# 'NUMBA_NUM_THREADS': '1',
|
| 83 |
+
# 'TOKENIZERS_PARALLELISM': 'false',
|
| 84 |
+
# 'MALLOC_ARENA_MAX': '2',
|
| 85 |
+
# # Additional ONNX Runtime fixes
|
| 86 |
+
# 'ONNXRUNTIME_LOG_SEVERITY_LEVEL': '3',
|
| 87 |
+
# 'ORT_LOGGING_LEVEL': 'WARNING'
|
| 88 |
+
# })
|
| 89 |
|
| 90 |
+
# # Try to fix ONNX Runtime libraries before running preloader
|
| 91 |
+
# try:
|
| 92 |
+
# import subprocess
|
| 93 |
+
# subprocess.run([
|
| 94 |
+
# 'find', '/usr/local/lib/python*/site-packages/onnxruntime',
|
| 95 |
+
# '-name', '*.so', '-exec', 'execstack', '-c', '{}', ';'
|
| 96 |
+
# ], capture_output=True, timeout=30)
|
| 97 |
+
# except:
|
| 98 |
+
# pass # Continue if execstack fix fails
|
| 99 |
|
| 100 |
# Try to run the preloader
|
| 101 |
result = subprocess.run(
|
| 102 |
['python', 'model_preloader.py'],
|
| 103 |
capture_output=True,
|
| 104 |
text=True,
|
| 105 |
+
timeout=300 # 5 minute timeout
|
| 106 |
+
# env=env
|
| 107 |
)
|
| 108 |
|
| 109 |
if result.returncode == 0:
|
|
|
|
| 113 |
return True
|
| 114 |
else:
|
| 115 |
logger.warning(f'⚠️ Model preloading failed with return code {result.returncode}')
|
| 116 |
+
# if result.stderr:
|
| 117 |
+
# # Filter out expected ONNX warnings
|
| 118 |
+
# stderr_lines = result.stderr.split('\n')
|
| 119 |
+
# important_errors = [line for line in stderr_lines
|
| 120 |
+
# if 'executable stack' not in line.lower()
|
| 121 |
+
# and 'onnxruntime' not in line.lower()
|
| 122 |
+
# and line.strip()]
|
| 123 |
+
# if important_errors:
|
| 124 |
+
# logger.warning(f'Important errors: {important_errors[:3]}')
|
| 125 |
return False
|
| 126 |
|
| 127 |
except subprocess.TimeoutExpired:
|