MLX_GPT_OSS_120B / mlx-gpt-oss-120b /optimized_performance_monitor.py
TroglodyteDerivations's picture
Upload 48 files
c28358e verified
raw
history blame
2.34 kB
#!/usr/bin/env python3
"""
Performance monitor for GPT-OSS-120B
"""
import time
from mlx_lm import load, generate
import psutil
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def monitor_performance():
"""Monitor model performance and resource usage"""
logger.info("πŸ“Š GPT-OSS-120B Performance Monitor")
logger.info("=" * 50)
# Check system resources
ram = psutil.virtual_memory()
logger.info(f"πŸ’Ύ System RAM: {ram.available / (1024**3):.1f}GB available")
# Load model and time it
load_start = time.time()
model, tokenizer = load("mlx-community/gpt-oss-120b-MXFP4-Q4")
load_time = time.time() - load_start
logger.info(f"⏱️ Model load time: {load_time:.2f}s")
# Test generation performance
test_prompts = [
"Hello, how are you?",
"Explain machine learning",
"What is the meaning of life?",
"Write a haiku about technology",
"Describe quantum physics"
]
total_tokens = 0
total_time = 0
for i, prompt in enumerate(test_prompts):
logger.info(f"\nπŸ§ͺ Test {i+1}: {prompt}")
# Time generation
gen_start = time.time()
response = generate(
model, tokenizer,
prompt=prompt,
max_tokens=50,
verbose=False
)
gen_time = time.time() - gen_start
# Estimate tokens (roughly)
tokens = len(response.split()) * 1.3 # Approximate
total_tokens += tokens
total_time += gen_time
tokens_per_sec = tokens / gen_time if gen_time > 0 else 0
logger.info(f" ⏱️ Time: {gen_time:.2f}s")
logger.info(f" πŸ“ˆ Speed: {tokens_per_sec:.1f} tokens/sec")
logger.info(f" πŸ“ Response: {response[:100]}...")
# Summary
avg_speed = total_tokens / total_time if total_time > 0 else 0
logger.info(f"\nπŸ“Š Summary:")
logger.info(f" Total tokens generated: {total_tokens:.0f}")
logger.info(f" Total time: {total_time:.2f}s")
logger.info(f" Average speed: {avg_speed:.1f} tokens/sec")
logger.info(f" Peak RAM usage: ~62GB (estimated)")
if __name__ == "__main__":
monitor_performance()