|
|
|
""" |
|
Performance monitor for GPT-OSS-120B |
|
""" |
|
|
|
import time |
|
from mlx_lm import load, generate |
|
import psutil |
|
import logging |
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
def monitor_performance(): |
|
"""Monitor model performance and resource usage""" |
|
logger.info("π GPT-OSS-120B Performance Monitor") |
|
logger.info("=" * 50) |
|
|
|
|
|
ram = psutil.virtual_memory() |
|
logger.info(f"πΎ System RAM: {ram.available / (1024**3):.1f}GB available") |
|
|
|
|
|
load_start = time.time() |
|
model, tokenizer = load("mlx-community/gpt-oss-120b-MXFP4-Q4") |
|
load_time = time.time() - load_start |
|
logger.info(f"β±οΈ Model load time: {load_time:.2f}s") |
|
|
|
|
|
test_prompts = [ |
|
"Hello, how are you?", |
|
"Explain machine learning", |
|
"What is the meaning of life?", |
|
"Write a haiku about technology", |
|
"Describe quantum physics" |
|
] |
|
|
|
total_tokens = 0 |
|
total_time = 0 |
|
|
|
for i, prompt in enumerate(test_prompts): |
|
logger.info(f"\nπ§ͺ Test {i+1}: {prompt}") |
|
|
|
|
|
gen_start = time.time() |
|
response = generate( |
|
model, tokenizer, |
|
prompt=prompt, |
|
max_tokens=50, |
|
verbose=False |
|
) |
|
gen_time = time.time() - gen_start |
|
|
|
|
|
tokens = len(response.split()) * 1.3 |
|
total_tokens += tokens |
|
total_time += gen_time |
|
|
|
tokens_per_sec = tokens / gen_time if gen_time > 0 else 0 |
|
|
|
logger.info(f" β±οΈ Time: {gen_time:.2f}s") |
|
logger.info(f" π Speed: {tokens_per_sec:.1f} tokens/sec") |
|
logger.info(f" π Response: {response[:100]}...") |
|
|
|
|
|
avg_speed = total_tokens / total_time if total_time > 0 else 0 |
|
logger.info(f"\nπ Summary:") |
|
logger.info(f" Total tokens generated: {total_tokens:.0f}") |
|
logger.info(f" Total time: {total_time:.2f}s") |
|
logger.info(f" Average speed: {avg_speed:.1f} tokens/sec") |
|
logger.info(f" Peak RAM usage: ~62GB (estimated)") |
|
|
|
if __name__ == "__main__": |
|
monitor_performance() |