{"model": "o1-2024-12-17", "score": 60.0} | |
{"model": "o3-mini-2025-01-31", "score": 42.8} | |
{"model": "deepseek-r1", "score": 28.7} | |
{"model": "o1-mini-2024-09-12", "score": 18.8} | |
{"model": "gemini-2.0-flash-thinking-exp-01-21", "score": 15.2} | |
{"model": "qwen2.5-max", "score": 13.8} | |
{"model": "llama-3.1-405b-instruct", "score": 13.2} |