TroglodyteDerivations's picture
Upload 48 files
c28358e verified
raw
history blame
3.36 kB
#!/usr/bin/env python3
"""
Corrected verification for GPT-OSS-120B-MXFP4-Q4 model
"""
import os
import json
import logging
from pathlib import Path
from transformers import AutoConfig, AutoTokenizer
import mlx.core as mx
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def verify_model_corrected(model_path):
"""Corrected verification for this specific model"""
logger.info(f"πŸ” Corrected verification for GPT-OSS-120B-MXFP4-Q4...")
# Check for actual files present
model_files = list(Path(model_path).glob("*.safetensors"))
logger.info(f"Found {len(model_files)} safetensors files")
# Check required files
required_files = [
"config.json",
"tokenizer.json",
"tokenizer_config.json",
"model.safetensors.index.json",
"generation_config.json"
]
missing_files = []
for file in required_files:
if not os.path.exists(os.path.join(model_path, file)):
missing_files.append(file)
if missing_files:
logger.warning(f"⚠️ Missing files: {missing_files}")
else:
logger.info("βœ… All required files present")
# Load config
try:
config = AutoConfig.from_pretrained(model_path)
logger.info(f"βœ… Config loaded successfully")
logger.info(f" Architecture: {config.architectures[0] if config.architectures else 'N/A'}")
logger.info(f" Vocab size: {config.vocab_size:,}")
logger.info(f" Hidden size: {config.hidden_size}")
logger.info(f" Num layers: {config.num_hidden_layers}")
logger.info(f" Model type: {config.model_type}")
except Exception as e:
logger.warning(f"⚠️ Could not load config: {e}")
# Load tokenizer
try:
tokenizer = AutoTokenizer.from_pretrained(model_path)
logger.info("βœ… Tokenizer loaded successfully")
# Test tokenization
test_text = "The capital of France is"
inputs = tokenizer(test_text, return_tensors="np")
logger.info(f"πŸ“ Tokenization test successful")
logger.info(f" Input shape: {inputs['input_ids'].shape}")
logger.info(f" Input tokens: {inputs['input_ids'][0]}")
return tokenizer
except Exception as e:
logger.warning(f"⚠️ Tokenizer loading failed: {e}")
return None
def check_disk_usage(model_path):
"""Check disk usage of downloaded model"""
total_size = 0
for file_path in Path(model_path).rglob('*'):
if file_path.is_file():
total_size += file_path.stat().st_size
size_gb = total_size / (1024 ** 3)
logger.info(f"πŸ’Ύ Total model size: {size_gb:.2f} GB")
return size_gb
if __name__ == "__main__":
model_path = "/Users/martinrivera/mlx-gpt-oss-120b/my_model"
logger.info("=" * 60)
logger.info("πŸ€— GPT-OSS-120B-MXFP4-Q4 Verification")
logger.info("=" * 60)
# Check disk usage
check_disk_usage(model_path)
# Verify model
tokenizer = verify_model_corrected(model_path)
logger.info("βœ… Model verification completed!")
logger.info("πŸ’‘ This model uses .safetensors format, not .npz")
logger.info(" You can use it with transformers or convert to MLX format")