diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -83,13 +83,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", @@ -342,6 +335,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", @@ -384,6 +384,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", @@ -426,6 +433,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", @@ -510,6 +524,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", @@ -592,7 +613,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"amazon\/nova-micro-v1", @@ -671,6 +692,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", @@ -755,13 +783,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", @@ -809,7 +830,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { "model":"amazon\/nova-micro-v1", @@ -853,13 +874,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", @@ -914,7 +928,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"amazon\/nova-micro-v1", @@ -1019,7 +1033,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"amazon\/nova-micro-v1", @@ -1110,7 +1124,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"amazon\/nova-micro-v1", @@ -1189,6 +1203,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", @@ -1299,7 +1320,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"amazon\/nova-micro-v1", @@ -1460,7 +1481,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"amazon\/nova-micro-v1", @@ -1509,7 +1530,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { "model":"amazon\/nova-micro-v1", @@ -1600,7 +1621,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { "model":"amazon\/nova-micro-v1", @@ -1693,6 +1714,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", @@ -1812,6 +1840,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", @@ -1854,6 +1889,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", @@ -1901,7 +1943,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"amazon\/nova-micro-v1", @@ -2015,6 +2057,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", @@ -2062,7 +2111,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { "model":"amazon\/nova-micro-v1", @@ -2106,6 +2155,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", @@ -2148,6 +2204,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", @@ -2239,6 +2302,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", @@ -2335,7 +2405,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"amazon\/nova-micro-v1", @@ -2384,7 +2454,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { "model":"amazon\/nova-micro-v1", @@ -2433,7 +2503,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { "model":"amazon\/nova-micro-v1", @@ -2477,6 +2547,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", @@ -2519,6 +2596,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", @@ -2566,7 +2650,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"amazon\/nova-micro-v1", @@ -2603,6 +2687,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", @@ -2622,7 +2713,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"amazon\/nova-micro-v1", @@ -2771,13 +2862,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"amazon\/nova-micro-v1", "bcp_47":"rw", @@ -2820,6 +2904,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", @@ -2911,13 +3002,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", @@ -3058,6 +3142,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", @@ -3100,13 +3191,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", @@ -3161,7 +3245,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"amazon\/nova-micro-v1", @@ -3205,6 +3289,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", @@ -3252,7 +3343,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"amazon\/nova-micro-v1", @@ -3331,6 +3422,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", @@ -3548,6 +3646,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", @@ -3590,6 +3695,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", @@ -3637,7 +3749,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { "model":"amazon\/nova-micro-v1", @@ -3681,13 +3793,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wo", @@ -3765,13 +3870,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"amazon\/nova-micro-v1", "bcp_47":"xh", @@ -3870,6 +3968,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", @@ -3924,7 +4029,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"amazon\/nova-micro-v1", @@ -3975,13 +4080,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", @@ -4094,13 +4192,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", @@ -4155,7 +4246,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -4239,7 +4330,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -4353,6 +4444,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", @@ -4395,6 +4493,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", @@ -4437,6 +4542,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", @@ -4521,6 +4633,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", @@ -4603,7 +4722,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -4682,6 +4801,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", @@ -4766,13 +4892,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", @@ -4820,7 +4939,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -4864,13 +4983,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", @@ -4925,7 +5037,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -4981,7 +5093,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -5200,6 +5312,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", @@ -5254,7 +5373,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -5471,7 +5590,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -5520,7 +5639,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -5611,7 +5730,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -5704,6 +5823,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", @@ -5823,6 +5949,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", @@ -5865,6 +5998,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", @@ -6026,6 +6166,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", @@ -6073,7 +6220,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -6117,6 +6264,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", @@ -6159,6 +6313,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", @@ -6206,7 +6367,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -6250,6 +6411,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", @@ -6395,7 +6563,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -6444,7 +6612,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -6488,6 +6656,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", @@ -6530,6 +6705,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", @@ -6577,7 +6759,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -6614,6 +6796,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", @@ -6633,7 +6822,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -6689,7 +6878,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -6782,13 +6971,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"rw", @@ -6831,6 +7013,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", @@ -6878,7 +7067,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -6922,13 +7111,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", @@ -7069,6 +7251,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", @@ -7111,13 +7300,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", @@ -7172,7 +7354,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -7216,6 +7398,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", @@ -7342,6 +7531,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", @@ -7431,7 +7627,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -7559,6 +7755,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", @@ -7601,6 +7804,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", @@ -7692,13 +7902,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wo", @@ -7776,13 +7979,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"xh", @@ -7837,7 +8033,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", @@ -7881,6 +8077,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", @@ -7986,13 +8189,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", @@ -8105,13 +8301,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", @@ -8250,7 +8439,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -8364,6 +8553,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", @@ -8406,6 +8602,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", @@ -8448,6 +8651,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", @@ -8532,6 +8742,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", @@ -8614,7 +8831,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -8693,6 +8910,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", @@ -8777,13 +9001,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", @@ -8831,7 +9048,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -8875,13 +9092,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", @@ -8992,7 +9202,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -9211,6 +9421,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", @@ -9265,7 +9482,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -9531,7 +9748,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -9622,7 +9839,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -9671,7 +9888,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -9715,6 +9932,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", @@ -9834,6 +10058,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", @@ -9876,6 +10107,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", @@ -9923,7 +10161,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -10037,6 +10275,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", @@ -10084,7 +10329,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -10128,6 +10373,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", @@ -10170,6 +10422,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", @@ -10217,7 +10476,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -10261,6 +10520,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", @@ -10308,7 +10574,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -10406,7 +10672,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -10499,6 +10765,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", @@ -10541,6 +10814,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", @@ -10625,6 +10905,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", @@ -10644,7 +10931,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -10700,7 +10987,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -10749,7 +11036,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -10793,13 +11080,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"rw", @@ -10842,6 +11122,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", @@ -10889,7 +11176,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -10933,13 +11220,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", @@ -11036,7 +11316,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -11080,6 +11360,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", @@ -11122,13 +11409,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", @@ -11227,6 +11507,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", @@ -11353,6 +11640,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", @@ -11570,6 +11864,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", @@ -11612,6 +11913,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", @@ -11703,13 +12011,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wo", @@ -11787,13 +12088,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"xh", @@ -11848,7 +12142,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -11892,6 +12186,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", @@ -11946,7 +12247,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", @@ -11997,13 +12298,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", @@ -12116,13 +12410,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", @@ -12177,7 +12464,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", @@ -12261,7 +12548,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -12375,6 +12662,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", @@ -12417,6 +12711,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", @@ -12459,6 +12760,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", @@ -12543,6 +12851,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", @@ -12704,6 +13019,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", @@ -12788,13 +13110,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", @@ -12842,7 +13157,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -12886,13 +13201,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", @@ -12947,7 +13255,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -13003,7 +13311,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -13052,7 +13360,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -13222,6 +13530,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", @@ -13276,7 +13591,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", @@ -13332,7 +13647,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", @@ -13633,7 +13948,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -13682,7 +13997,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -13726,6 +14041,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", @@ -13845,6 +14167,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", @@ -13887,6 +14216,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", @@ -13934,7 +14270,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -14048,6 +14384,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", @@ -14139,6 +14482,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", @@ -14181,6 +14531,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", @@ -14228,7 +14585,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", @@ -14272,6 +14629,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", @@ -14319,7 +14683,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", @@ -14368,7 +14732,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -14466,7 +14830,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -14510,6 +14874,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", @@ -14552,6 +14923,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", @@ -14599,7 +14977,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -14636,6 +15014,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", @@ -14655,7 +15040,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -14711,7 +15096,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", @@ -14760,7 +15145,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -14804,13 +15189,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"rw", @@ -14853,6 +15231,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", @@ -14944,13 +15329,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", @@ -14998,7 +15376,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", @@ -15091,6 +15469,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", @@ -15133,13 +15518,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", @@ -15194,7 +15572,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", @@ -15238,6 +15616,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", @@ -15285,7 +15670,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", @@ -15364,6 +15749,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", @@ -15453,7 +15845,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -15502,7 +15894,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -15581,6 +15973,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", @@ -15623,6 +16022,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", @@ -15670,7 +16076,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", @@ -15714,13 +16120,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wo", @@ -15798,13 +16197,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"xh", @@ -15859,7 +16251,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", @@ -15903,6 +16295,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", @@ -15957,7 +16356,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", @@ -16008,13 +16407,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", @@ -16127,13 +16519,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", @@ -16272,7 +16657,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -16386,6 +16771,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", @@ -16428,6 +16820,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", @@ -16470,6 +16869,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", @@ -16554,6 +16960,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", @@ -16636,7 +17049,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -16715,6 +17128,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", @@ -16799,13 +17219,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", @@ -16853,7 +17266,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -16897,13 +17310,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", @@ -16958,7 +17364,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -17063,7 +17469,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -17105,7 +17511,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -17154,7 +17560,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -17233,6 +17639,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", @@ -17287,7 +17700,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -17343,7 +17756,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -17504,7 +17917,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -17553,7 +17966,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -17644,7 +18057,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -17737,6 +18150,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", @@ -17856,6 +18276,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", @@ -17898,6 +18325,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", @@ -17945,7 +18379,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -18059,6 +18493,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", @@ -18106,7 +18547,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -18150,6 +18591,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", @@ -18192,6 +18640,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", @@ -18239,7 +18694,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -18283,6 +18738,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", @@ -18330,7 +18792,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"deepseek\/deepseek-chat", @@ -18379,7 +18841,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -18477,7 +18939,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -18521,6 +18983,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", @@ -18563,6 +19032,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", @@ -18610,7 +19086,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -18647,6 +19123,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", @@ -18666,7 +19149,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -18771,7 +19254,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -18815,13 +19298,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"deepseek\/deepseek-chat", "bcp_47":"rw", @@ -18864,6 +19340,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", @@ -18911,7 +19394,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -18955,13 +19438,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", @@ -19009,7 +19485,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"deepseek\/deepseek-chat", @@ -19102,6 +19578,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", @@ -19144,13 +19627,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", @@ -19249,6 +19725,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", @@ -19296,7 +19779,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { "model":"deepseek\/deepseek-chat", @@ -19375,6 +19858,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", @@ -19464,7 +19954,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -19513,7 +20003,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"deepseek\/deepseek-chat", @@ -19592,6 +20082,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", @@ -19634,6 +20131,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", @@ -19681,7 +20185,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -19725,13 +20229,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wo", @@ -19809,13 +20306,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"deepseek\/deepseek-chat", "bcp_47":"xh", @@ -19870,7 +20360,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"deepseek\/deepseek-chat", @@ -19914,6 +20404,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", @@ -19968,7 +20465,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"deepseek\/deepseek-chat", @@ -20019,13 +20516,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", @@ -20138,13 +20628,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", @@ -20199,7 +20682,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -20283,7 +20766,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -20397,6 +20880,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", @@ -20439,6 +20929,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", @@ -20481,6 +20978,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", @@ -20565,6 +21069,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", @@ -20647,7 +21158,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -20726,6 +21237,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", @@ -20810,13 +21328,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", @@ -20864,7 +21375,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -20908,13 +21419,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", @@ -20969,7 +21473,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21025,7 +21529,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21074,7 +21578,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21116,7 +21620,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21165,7 +21669,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21244,6 +21748,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", @@ -21298,7 +21809,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21354,7 +21865,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21515,7 +22026,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21564,7 +22075,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21704,7 +22215,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -21748,6 +22259,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", @@ -21867,6 +22385,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", @@ -21909,6 +22434,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", @@ -21956,7 +22488,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22070,6 +22602,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", @@ -22117,7 +22656,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22161,6 +22700,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", @@ -22203,6 +22749,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", @@ -22294,6 +22847,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", @@ -22341,7 +22901,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22390,7 +22950,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22439,7 +22999,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22488,7 +23048,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22532,6 +23092,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", @@ -22574,6 +23141,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", @@ -22621,7 +23195,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22658,6 +23232,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", @@ -22677,7 +23258,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22733,7 +23314,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22782,7 +23363,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22826,13 +23407,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"rw", @@ -22875,6 +23449,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", @@ -22922,7 +23503,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -22966,13 +23547,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", @@ -23020,7 +23594,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -23069,7 +23643,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -23113,6 +23687,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", @@ -23155,13 +23736,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", @@ -23216,7 +23790,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -23260,6 +23834,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", @@ -23307,7 +23888,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -23386,6 +23967,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", @@ -23475,7 +24063,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -23524,7 +24112,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -23603,6 +24191,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", @@ -23645,6 +24240,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", @@ -23692,7 +24294,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -23736,13 +24338,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wo", @@ -23820,13 +24415,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"xh", @@ -23881,7 +24469,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -23925,6 +24513,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", @@ -23979,7 +24574,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.1 }, { "model":"deepseek\/deepseek-chat-v3-0324", @@ -24030,13 +24625,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", @@ -24149,13 +24737,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", @@ -24408,6 +24989,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", @@ -24450,6 +25038,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", @@ -24492,6 +25087,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", @@ -24576,6 +25178,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", @@ -24737,6 +25346,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", @@ -24821,13 +25437,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", @@ -24919,13 +25528,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", @@ -25255,6 +25857,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", @@ -25365,7 +25974,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { "model":"deepseek\/deepseek-r1", @@ -25759,6 +26368,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", @@ -25878,6 +26494,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", @@ -25920,6 +26543,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", @@ -26081,6 +26711,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", @@ -26172,6 +26809,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", @@ -26214,6 +26858,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", @@ -26305,6 +26956,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", @@ -26543,6 +27201,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", @@ -26585,6 +27250,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", @@ -26669,6 +27341,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", @@ -26837,13 +27516,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1", "bcp_47":"rw", @@ -26886,6 +27558,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", @@ -26977,13 +27656,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", @@ -27124,6 +27796,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", @@ -27166,13 +27845,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", @@ -27271,6 +27943,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", @@ -27397,6 +28076,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", @@ -27614,6 +28300,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", @@ -27656,6 +28349,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", @@ -27747,13 +28447,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wo", @@ -27831,13 +28524,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1", "bcp_47":"xh", @@ -27936,6 +28622,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", @@ -28041,13 +28734,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", @@ -28125,13 +28811,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ak", @@ -28384,6 +29063,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", @@ -28426,6 +29112,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", @@ -28468,6 +29161,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", @@ -28552,6 +29252,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", @@ -28643,6 +29350,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", @@ -28727,13 +29441,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"cs", @@ -28825,13 +29532,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"el", @@ -29161,6 +29861,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", @@ -29630,6 +30337,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", @@ -29714,6 +30428,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", @@ -29756,6 +30477,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", @@ -29917,6 +30645,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", @@ -30008,6 +30743,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", @@ -30050,6 +30792,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", @@ -30141,6 +30890,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", @@ -30379,6 +31135,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", @@ -30421,6 +31184,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", @@ -30505,6 +31275,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", @@ -30673,13 +31450,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"rw", @@ -30722,6 +31492,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", @@ -30813,13 +31590,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sn", @@ -30960,6 +31730,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", @@ -31002,13 +31779,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sv", @@ -31107,6 +31877,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", @@ -31198,6 +31975,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", @@ -31380,6 +32164,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", @@ -31422,6 +32213,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", @@ -31513,13 +32311,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wo", @@ -31597,13 +32388,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"xh", @@ -31702,6 +32486,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", @@ -31807,13 +32598,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zu", @@ -31926,13 +32710,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", @@ -31987,7 +32764,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -32185,6 +32962,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", @@ -32227,6 +33011,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", @@ -32269,6 +33060,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", @@ -32353,6 +33151,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", @@ -32435,7 +33240,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -32514,6 +33319,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", @@ -32598,13 +33410,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", @@ -32652,7 +33457,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -32696,13 +33501,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", @@ -32757,7 +33555,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -32813,7 +33611,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -32862,7 +33660,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -32904,7 +33702,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -32953,7 +33751,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -33032,6 +33830,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", @@ -33086,7 +33891,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", @@ -33303,7 +34108,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -33352,7 +34157,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -33443,7 +34248,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -33492,7 +34297,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", @@ -33536,6 +34341,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", @@ -33655,6 +34467,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", @@ -33697,6 +34516,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", @@ -33744,7 +34570,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", @@ -33858,6 +34684,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", @@ -33905,7 +34738,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -33949,6 +34782,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", @@ -33991,6 +34831,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", @@ -34038,7 +34885,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34082,6 +34929,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", @@ -34129,7 +34983,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34178,7 +35032,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34227,7 +35081,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34276,7 +35130,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34320,6 +35174,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", @@ -34362,6 +35223,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", @@ -34409,7 +35277,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34446,6 +35314,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", @@ -34465,7 +35340,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34521,7 +35396,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34570,7 +35445,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34614,13 +35489,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"rw", @@ -34663,6 +35531,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", @@ -34754,13 +35629,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", @@ -34808,7 +35676,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34857,7 +35725,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -34901,6 +35769,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", @@ -34943,13 +35818,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", @@ -35004,7 +35872,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -35048,6 +35916,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", @@ -35095,7 +35970,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -35174,6 +36049,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", @@ -35263,7 +36145,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -35312,7 +36194,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -35391,6 +36273,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", @@ -35433,6 +36322,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", @@ -35480,7 +36376,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -35524,13 +36420,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wo", @@ -35608,13 +36497,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"xh", @@ -35669,7 +36551,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -35713,6 +36595,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", @@ -35767,7 +36656,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", @@ -35818,13 +36707,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", @@ -35937,13 +36819,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", @@ -36082,7 +36957,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -36196,6 +37071,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", @@ -36238,6 +37120,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", @@ -36280,6 +37169,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", @@ -36364,6 +37260,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", @@ -36446,7 +37349,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -36525,6 +37428,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", @@ -36609,13 +37519,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", @@ -36663,7 +37566,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -36707,13 +37610,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", @@ -36768,7 +37664,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -36824,7 +37720,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -36915,7 +37811,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -36964,7 +37860,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -37043,6 +37939,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", @@ -37314,7 +38217,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -37363,7 +38266,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -37454,7 +38357,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -37503,7 +38406,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -37547,6 +38450,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", @@ -37666,6 +38576,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", @@ -37708,6 +38625,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", @@ -37755,7 +38679,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -37869,6 +38793,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", @@ -37916,7 +38847,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -37960,6 +38891,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", @@ -38002,6 +38940,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", @@ -38049,7 +38994,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38093,6 +39038,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", @@ -38140,7 +39092,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38189,7 +39141,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38238,7 +39190,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38287,7 +39239,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38331,6 +39283,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", @@ -38373,6 +39332,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", @@ -38420,7 +39386,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38457,6 +39423,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", @@ -38532,7 +39505,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38581,7 +39554,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38625,13 +39598,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"rw", @@ -38674,6 +39640,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", @@ -38721,7 +39694,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38765,13 +39738,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", @@ -38868,7 +39834,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -38912,6 +39878,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", @@ -38954,13 +39927,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", @@ -39059,6 +40025,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", @@ -39106,7 +40079,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -39185,6 +40158,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", @@ -39274,7 +40254,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -39323,7 +40303,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -39402,6 +40382,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", @@ -39444,6 +40431,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", @@ -39491,7 +40485,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -39535,13 +40529,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wo", @@ -39619,13 +40606,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"xh", @@ -39680,7 +40660,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -39724,6 +40704,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", @@ -39778,7 +40765,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", @@ -39829,13 +40816,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", @@ -39913,13 +40893,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", @@ -39974,7 +40947,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"google\/gemini-2.5-flash", @@ -40172,6 +41145,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", @@ -40214,6 +41194,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", @@ -40256,6 +41243,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", @@ -40340,6 +41334,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", @@ -40431,6 +41432,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", @@ -40515,13 +41523,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", @@ -40569,7 +41570,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"google\/gemini-2.5-flash", @@ -40613,13 +41614,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", @@ -40674,7 +41668,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { "model":"google\/gemini-2.5-flash", @@ -40730,7 +41724,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.5-flash", @@ -40870,7 +41864,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.5-flash", @@ -40949,6 +41943,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", @@ -41059,7 +42060,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { "model":"google\/gemini-2.5-flash", @@ -41185,7 +42186,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.5-flash", @@ -41325,7 +42326,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-2.5-flash", @@ -41374,7 +42375,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.5-flash", @@ -41418,6 +42419,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", @@ -41502,6 +42510,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", @@ -41544,6 +42559,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", @@ -41705,6 +42727,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", @@ -41796,6 +42825,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", @@ -41838,6 +42874,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", @@ -41929,6 +42972,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", @@ -41976,7 +43026,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.5-flash", @@ -42025,7 +43075,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"google\/gemini-2.5-flash", @@ -42167,6 +43217,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", @@ -42209,6 +43266,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", @@ -42256,7 +43320,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"google\/gemini-2.5-flash", @@ -42293,6 +43357,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", @@ -42312,7 +43383,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"google\/gemini-2.5-flash", @@ -42417,7 +43488,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"google\/gemini-2.5-flash", @@ -42461,13 +43532,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash", "bcp_47":"rw", @@ -42510,6 +43574,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", @@ -42601,13 +43672,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", @@ -42704,7 +43768,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"google\/gemini-2.5-flash", @@ -42748,6 +43812,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", @@ -42790,13 +43861,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", @@ -42851,7 +43915,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { "model":"google\/gemini-2.5-flash", @@ -42895,6 +43959,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", @@ -42986,6 +44057,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", @@ -43168,6 +44246,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", @@ -43210,6 +44295,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", @@ -43257,7 +44349,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { "model":"google\/gemini-2.5-flash", @@ -43301,13 +44393,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wo", @@ -43385,13 +44470,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash", "bcp_47":"xh", @@ -43446,7 +44524,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-2.5-flash", @@ -43490,6 +44568,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", @@ -43544,7 +44629,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { "model":"google\/gemini-2.5-flash", @@ -43595,13 +44680,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", @@ -43679,13 +44757,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ak", @@ -43728,13 +44799,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"am", @@ -43812,13 +44876,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ar", @@ -44141,13 +45198,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"bn", @@ -44274,13 +45324,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"cs", @@ -44323,13 +45366,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"de", @@ -44372,13 +45408,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"el", @@ -44421,13 +45450,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"en", @@ -44470,13 +45492,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"es", @@ -44519,13 +45534,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"fa", @@ -44561,13 +45569,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"fil", @@ -44610,13 +45611,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"fr", @@ -44736,13 +45730,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ha", @@ -44785,13 +45772,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"hi", @@ -44911,13 +45891,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"id", @@ -44960,13 +45933,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ig", @@ -45051,13 +46017,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"it", @@ -45100,13 +46059,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ja", @@ -45317,13 +46269,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ko", @@ -45478,13 +46423,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"mg", @@ -45611,13 +46549,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ms", @@ -45702,13 +46633,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ne", @@ -45751,13 +46675,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"nl", @@ -45800,13 +46717,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ny", @@ -45849,13 +46759,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"om", @@ -45982,13 +46885,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"pl", @@ -46038,13 +46934,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"pt", @@ -46094,13 +46983,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ro", @@ -46143,13 +47025,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ru", @@ -46192,13 +47067,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"rw", @@ -46283,13 +47151,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"si", @@ -46332,13 +47193,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"sn", @@ -46381,13 +47235,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"so", @@ -46430,13 +47277,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"sr", @@ -46521,13 +47361,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"sv", @@ -46570,13 +47403,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"sw", @@ -46661,13 +47487,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"te", @@ -46794,13 +47613,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"tr", @@ -46843,13 +47655,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"uk", @@ -46976,13 +47781,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"vi", @@ -47025,13 +47823,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"wo", @@ -47109,13 +47900,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"xh", @@ -47158,13 +47942,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"yo", @@ -47249,13 +48026,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"zh", @@ -47298,13 +48068,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"zu", @@ -47417,13 +48180,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ak", @@ -47466,13 +48222,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"am", @@ -47550,13 +48299,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ar", @@ -47914,13 +48656,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"bn", @@ -48082,13 +48817,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"cs", @@ -48131,13 +48859,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"de", @@ -48180,13 +48901,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"el", @@ -48229,13 +48943,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"en", @@ -48278,13 +48985,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"es", @@ -48327,13 +49027,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fa", @@ -48369,13 +49062,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fil", @@ -48418,13 +49104,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fr", @@ -48544,13 +49223,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ha", @@ -48593,13 +49265,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"hi", @@ -48754,13 +49419,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"id", @@ -48803,13 +49461,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ig", @@ -48894,13 +49545,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"it", @@ -48943,13 +49587,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ja", @@ -49195,13 +49832,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ko", @@ -49356,13 +49986,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mg", @@ -49489,13 +50112,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ms", @@ -49580,13 +50196,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ne", @@ -49629,13 +50238,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"nl", @@ -49678,13 +50280,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ny", @@ -49727,13 +50322,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"om", @@ -49860,13 +50448,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pl", @@ -49916,13 +50497,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pt", @@ -49972,13 +50546,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ro", @@ -50021,13 +50588,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ru", @@ -50070,13 +50630,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"rw", @@ -50161,13 +50714,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"si", @@ -50210,13 +50756,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sn", @@ -50259,13 +50798,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"so", @@ -50308,13 +50840,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sr", @@ -50399,13 +50924,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sv", @@ -50448,13 +50966,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sw", @@ -50539,13 +51050,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"te", @@ -50707,13 +51211,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"tr", @@ -50756,13 +51253,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"uk", @@ -50924,13 +51414,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"vi", @@ -50973,13 +51456,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"wo", @@ -51057,13 +51533,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"xh", @@ -51106,13 +51575,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"yo", @@ -51197,13 +51659,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"zh", @@ -51246,13 +51701,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"zu", @@ -51365,13 +51813,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ak", @@ -51414,13 +51855,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"am", @@ -51498,13 +51932,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ar", @@ -51862,13 +52289,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"bn", @@ -52030,13 +52450,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"cs", @@ -52079,13 +52492,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"de", @@ -52128,13 +52534,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"el", @@ -52177,13 +52576,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"en", @@ -52226,13 +52618,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"es", @@ -52275,13 +52660,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fa", @@ -52317,13 +52695,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fil", @@ -52366,13 +52737,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fr", @@ -52492,13 +52856,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ha", @@ -52541,13 +52898,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"hi", @@ -52702,13 +53052,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"id", @@ -52751,13 +53094,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ig", @@ -52842,13 +53178,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"it", @@ -52891,13 +53220,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ja", @@ -53143,13 +53465,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ko", @@ -53304,13 +53619,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mg", @@ -53437,13 +53745,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ms", @@ -53528,13 +53829,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ne", @@ -53577,13 +53871,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"nl", @@ -53626,13 +53913,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ny", @@ -53675,13 +53955,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"om", @@ -53808,13 +54081,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pl", @@ -53864,13 +54130,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pt", @@ -53920,13 +54179,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ro", @@ -53969,13 +54221,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ru", @@ -54018,13 +54263,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"rw", @@ -54109,13 +54347,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"si", @@ -54158,13 +54389,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sn", @@ -54207,13 +54431,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"so", @@ -54256,13 +54473,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sr", @@ -54347,13 +54557,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sv", @@ -54396,13 +54599,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sw", @@ -54487,13 +54683,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"te", @@ -54655,13 +54844,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"tr", @@ -54704,13 +54886,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"uk", @@ -54872,13 +55047,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"vi", @@ -54921,13 +55089,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"wo", @@ -55005,13 +55166,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"xh", @@ -55054,13 +55208,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"yo", @@ -55145,13 +55292,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"zh", @@ -55194,13 +55334,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-2.5-flash-preview-05-20", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"zu", @@ -55236,13 +55369,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ar", @@ -55285,13 +55411,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bn", @@ -55334,13 +55453,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"de", @@ -55383,13 +55495,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", @@ -55432,13 +55537,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"es", @@ -55474,13 +55572,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fa", @@ -55523,13 +55614,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fr", @@ -55565,13 +55649,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hi", @@ -55607,13 +55684,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"id", @@ -55656,13 +55726,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"it", @@ -55705,13 +55768,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ja", @@ -55782,13 +55838,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ko", @@ -55894,13 +55943,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pt", @@ -55943,13 +55985,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ru", @@ -55992,13 +56027,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", @@ -56076,13 +56104,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"te", @@ -56118,13 +56139,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"tr", @@ -56195,13 +56209,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"vi", @@ -56314,13 +56321,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zh", @@ -56356,13 +56356,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"ar", @@ -56405,13 +56398,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"bn", @@ -56454,13 +56440,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"de", @@ -56503,13 +56482,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"en", @@ -56552,13 +56524,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"es", @@ -56594,13 +56559,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"fa", @@ -56643,13 +56601,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"fr", @@ -56685,13 +56636,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"hi", @@ -56727,13 +56671,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"id", @@ -56776,13 +56713,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"it", @@ -56825,13 +56755,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"ja", @@ -56902,13 +56825,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"ko", @@ -57014,13 +56930,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"pt", @@ -57063,13 +56972,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"ru", @@ -57112,13 +57014,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"sw", @@ -57196,13 +57091,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"te", @@ -57238,13 +57126,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"tr", @@ -57315,13 +57196,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"vi", @@ -57434,13 +57308,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview", "bcp_47":"zh", @@ -57553,13 +57420,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ak", @@ -57602,13 +57462,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"am", @@ -57686,13 +57539,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ar", @@ -58050,13 +57896,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"bn", @@ -58218,13 +58057,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"cs", @@ -58267,13 +58099,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"de", @@ -58316,13 +58141,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"el", @@ -58365,13 +58183,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"en", @@ -58414,13 +58225,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"es", @@ -58463,13 +58267,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"fa", @@ -58505,13 +58302,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"fil", @@ -58554,13 +58344,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"fr", @@ -58680,13 +58463,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ha", @@ -58729,13 +58505,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"hi", @@ -58890,13 +58659,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"id", @@ -58939,13 +58701,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ig", @@ -59030,13 +58785,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"it", @@ -59079,13 +58827,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ja", @@ -59331,13 +59072,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ko", @@ -59492,13 +59226,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"mg", @@ -59625,13 +59352,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ms", @@ -59716,13 +59436,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ne", @@ -59765,13 +59478,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"nl", @@ -59814,13 +59520,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ny", @@ -59863,13 +59562,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"om", @@ -59996,13 +59688,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"pl", @@ -60052,13 +59737,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"pt", @@ -60108,13 +59786,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ro", @@ -60157,13 +59828,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ru", @@ -60206,13 +59870,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"rw", @@ -60297,13 +59954,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"si", @@ -60346,13 +59996,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"sn", @@ -60395,13 +60038,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"so", @@ -60444,13 +60080,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"sr", @@ -60535,13 +60164,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"sv", @@ -60584,13 +60206,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"sw", @@ -60675,13 +60290,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"te", @@ -60843,13 +60451,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"tr", @@ -60892,13 +60493,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"uk", @@ -61060,13 +60654,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"vi", @@ -61109,13 +60696,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"wo", @@ -61193,13 +60773,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"xh", @@ -61242,13 +60815,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"yo", @@ -61333,13 +60899,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"zh", @@ -61382,13 +60941,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"zu", @@ -61501,13 +61053,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ak", @@ -61562,7 +61107,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { "model":"google\/gemini-flash-1.5", @@ -61646,7 +61191,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"google\/gemini-flash-1.5", @@ -61760,6 +61305,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", @@ -61802,6 +61354,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", @@ -61844,6 +61403,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", @@ -61928,6 +61494,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", @@ -62010,7 +61583,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -62089,6 +61662,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", @@ -62173,13 +61753,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-flash-1.5", "bcp_47":"cs", @@ -62227,7 +61800,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -62271,13 +61844,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-flash-1.5", "bcp_47":"el", @@ -62332,7 +61898,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -62388,7 +61954,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -62437,7 +62003,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -62479,7 +62045,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -62528,7 +62094,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -62607,6 +62173,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", @@ -62661,7 +62234,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { "model":"google\/gemini-flash-1.5", @@ -62717,7 +62290,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -62927,7 +62500,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { "model":"google\/gemini-flash-1.5", @@ -63018,7 +62591,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -63111,6 +62684,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", @@ -63230,6 +62810,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", @@ -63272,6 +62859,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", @@ -63319,7 +62913,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"google\/gemini-flash-1.5", @@ -63433,6 +63027,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", @@ -63480,7 +63081,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"google\/gemini-flash-1.5", @@ -63524,6 +63125,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", @@ -63566,6 +63174,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", @@ -63613,7 +63228,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -63657,6 +63272,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", @@ -63704,7 +63326,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"google\/gemini-flash-1.5", @@ -63802,7 +63424,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"google\/gemini-flash-1.5", @@ -63895,6 +63517,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", @@ -63937,6 +63566,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", @@ -63984,7 +63620,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -64021,6 +63657,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", @@ -64096,7 +63739,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -64145,7 +63788,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { "model":"google\/gemini-flash-1.5", @@ -64189,13 +63832,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-flash-1.5", "bcp_47":"rw", @@ -64238,6 +63874,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", @@ -64329,13 +63972,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sn", @@ -64383,7 +64019,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"google\/gemini-flash-1.5", @@ -64476,6 +64112,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", @@ -64518,13 +64161,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sv", @@ -64579,7 +64215,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -64623,6 +64259,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", @@ -64670,7 +64313,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { "model":"google\/gemini-flash-1.5", @@ -64749,6 +64392,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"th", @@ -64966,6 +64616,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", @@ -65008,6 +64665,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", @@ -65055,7 +64719,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"google\/gemini-flash-1.5", @@ -65099,13 +64763,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wo", @@ -65183,13 +64840,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemini-flash-1.5", "bcp_47":"xh", @@ -65244,7 +64894,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"google\/gemini-flash-1.5", @@ -65288,6 +64938,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", @@ -65393,13 +65050,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", @@ -65512,13 +65162,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", @@ -65573,7 +65216,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", @@ -65771,6 +65414,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", @@ -65813,6 +65463,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", @@ -65855,6 +65512,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", @@ -65939,6 +65603,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", @@ -66100,6 +65771,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", @@ -66184,13 +65862,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", @@ -66238,7 +65909,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -66282,13 +65953,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", @@ -66343,7 +66007,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", @@ -66399,7 +66063,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", @@ -66539,7 +66203,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", @@ -66618,6 +66282,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", @@ -66672,7 +66343,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", @@ -66889,7 +66560,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", @@ -66938,7 +66609,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", @@ -67078,7 +66749,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -67122,6 +66793,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", @@ -67241,6 +66919,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", @@ -67283,6 +66968,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", @@ -67330,7 +67022,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -67444,6 +67136,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", @@ -67491,7 +67190,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", @@ -67535,6 +67234,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", @@ -67577,6 +67283,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", @@ -67668,6 +67381,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", @@ -67715,7 +67435,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -67764,7 +67484,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", @@ -67813,7 +67533,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", @@ -67862,7 +67582,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", @@ -67906,6 +67626,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", @@ -67948,6 +67675,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", @@ -67995,7 +67729,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -68032,6 +67766,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", @@ -68051,7 +67792,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", @@ -68156,7 +67897,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", @@ -68200,13 +67941,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", @@ -68249,6 +67983,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", @@ -68296,7 +68037,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -68340,13 +68081,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", @@ -68394,7 +68128,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { "model":"google\/gemini-flash-1.5-8b", @@ -68443,7 +68177,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -68487,6 +68221,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", @@ -68529,13 +68270,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", @@ -68590,7 +68324,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -68634,6 +68368,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", @@ -68760,6 +68501,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", @@ -68849,7 +68597,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -68898,7 +68646,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -68977,6 +68725,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", @@ -69019,6 +68774,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", @@ -69110,13 +68872,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", @@ -69194,13 +68949,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", @@ -69299,6 +69047,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", @@ -69353,7 +69108,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", @@ -69404,13 +69159,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", @@ -69523,13 +69271,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ak", @@ -69584,7 +69325,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { "model":"google\/gemma-3-27b-it", @@ -69668,7 +69409,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"google\/gemma-3-27b-it", @@ -69782,6 +69523,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", @@ -69824,6 +69572,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", @@ -69866,6 +69621,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", @@ -69950,6 +69712,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", @@ -70111,6 +69880,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", @@ -70195,13 +69971,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"google\/gemma-3-27b-it", "bcp_47":"cs", @@ -70249,7 +70018,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"google\/gemma-3-27b-it", @@ -70293,13 +70062,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemma-3-27b-it", "bcp_47":"el", @@ -70354,7 +70116,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"google\/gemma-3-27b-it", @@ -70410,7 +70172,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"google\/gemma-3-27b-it", @@ -70501,7 +70263,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { "model":"google\/gemma-3-27b-it", @@ -70550,7 +70312,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"google\/gemma-3-27b-it", @@ -70629,6 +70391,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", @@ -70683,7 +70452,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { "model":"google\/gemma-3-27b-it", @@ -70739,7 +70508,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"google\/gemma-3-27b-it", @@ -70900,7 +70669,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"google\/gemma-3-27b-it", @@ -70949,7 +70718,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { "model":"google\/gemma-3-27b-it", @@ -71040,7 +70809,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { "model":"google\/gemma-3-27b-it", @@ -71089,7 +70858,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"google\/gemma-3-27b-it", @@ -71133,6 +70902,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", @@ -71252,6 +71028,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", @@ -71294,6 +71077,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", @@ -71341,7 +71131,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { "model":"google\/gemma-3-27b-it", @@ -71455,6 +71245,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", @@ -71502,7 +71299,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { "model":"google\/gemma-3-27b-it", @@ -71546,6 +71343,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", @@ -71588,6 +71392,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", @@ -71635,7 +71446,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { "model":"google\/gemma-3-27b-it", @@ -71679,6 +71490,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", @@ -71775,7 +71593,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"google\/gemma-3-27b-it", @@ -71824,7 +71642,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"google\/gemma-3-27b-it", @@ -71917,6 +71735,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", @@ -71959,6 +71784,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", @@ -72006,7 +71838,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.7 }, { "model":"google\/gemma-3-27b-it", @@ -72043,6 +71875,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", @@ -72062,7 +71901,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"google\/gemma-3-27b-it", @@ -72118,7 +71957,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"google\/gemma-3-27b-it", @@ -72211,13 +72050,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"google\/gemma-3-27b-it", "bcp_47":"rw", @@ -72260,6 +72092,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", @@ -72351,13 +72190,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sn", @@ -72405,7 +72237,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"google\/gemma-3-27b-it", @@ -72454,7 +72286,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"google\/gemma-3-27b-it", @@ -72498,6 +72330,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", @@ -72540,13 +72379,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sv", @@ -72601,7 +72433,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { "model":"google\/gemma-3-27b-it", @@ -72645,6 +72477,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", @@ -72692,7 +72531,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { "model":"google\/gemma-3-27b-it", @@ -72771,6 +72610,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"th", @@ -72860,7 +72706,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"google\/gemma-3-27b-it", @@ -72909,7 +72755,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { "model":"google\/gemma-3-27b-it", @@ -72988,6 +72834,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", @@ -73030,6 +72883,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", @@ -73077,7 +72937,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"google\/gemma-3-27b-it", @@ -73121,13 +72981,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wo", @@ -73205,13 +73058,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"google\/gemma-3-27b-it", "bcp_47":"xh", @@ -73310,6 +73156,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", @@ -73415,13 +73268,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", @@ -75830,13 +75676,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", @@ -75891,7 +75730,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", @@ -75975,7 +75814,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", @@ -76089,6 +75928,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", @@ -76131,6 +75977,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", @@ -76173,6 +76026,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", @@ -76257,6 +76117,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", @@ -76339,7 +76206,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", @@ -76418,6 +76285,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", @@ -76502,13 +76376,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", @@ -76556,7 +76423,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", @@ -76600,13 +76467,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", @@ -76661,7 +76521,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -76717,7 +76577,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", @@ -76766,7 +76626,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", @@ -76808,7 +76668,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -76857,7 +76717,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", @@ -76936,6 +76796,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", @@ -77207,7 +77074,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -77256,7 +77123,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", @@ -77347,7 +77214,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -77396,7 +77263,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", @@ -77440,6 +77307,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", @@ -77559,6 +77433,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", @@ -77601,6 +77482,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", @@ -77762,6 +77650,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", @@ -77853,6 +77748,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", @@ -77895,6 +77797,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", @@ -77942,7 +77851,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -77986,6 +77895,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", @@ -78082,7 +77998,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -78131,7 +78047,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", @@ -78180,7 +78096,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", @@ -78224,6 +78140,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", @@ -78266,6 +78189,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", @@ -78313,7 +78243,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -78350,6 +78280,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", @@ -78369,7 +78306,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", @@ -78518,13 +78455,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", @@ -78567,6 +78497,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", @@ -78614,7 +78551,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -78658,13 +78595,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", @@ -78712,7 +78642,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", @@ -78761,7 +78691,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", @@ -78805,6 +78735,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", @@ -78847,13 +78784,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", @@ -78952,6 +78882,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", @@ -78999,7 +78936,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -79078,6 +79015,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", @@ -79167,7 +79111,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", @@ -79295,6 +79239,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", @@ -79337,6 +79288,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", @@ -79384,7 +79342,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", @@ -79428,13 +79386,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", @@ -79512,13 +79463,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", @@ -79573,7 +79517,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", @@ -79617,6 +79561,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", @@ -79722,13 +79673,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", @@ -79841,13 +79785,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", @@ -79986,7 +79923,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -80100,6 +80037,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", @@ -80142,6 +80086,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", @@ -80184,6 +80135,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", @@ -80268,6 +80226,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", @@ -80350,7 +80315,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -80429,6 +80394,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", @@ -80513,13 +80485,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", @@ -80567,7 +80532,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -80611,13 +80576,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", @@ -80672,7 +80630,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -80728,7 +80686,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -80947,6 +80905,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", @@ -81057,7 +81022,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -81218,7 +81183,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -81358,7 +81323,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -81407,7 +81372,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -81451,6 +81416,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", @@ -81570,6 +81542,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", @@ -81612,6 +81591,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", @@ -81659,7 +81645,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -81773,6 +81759,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", @@ -81864,6 +81857,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", @@ -81906,6 +81906,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", @@ -81953,7 +81960,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -81997,6 +82004,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", @@ -82044,7 +82058,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82093,7 +82107,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82142,7 +82156,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82191,7 +82205,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82235,6 +82249,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", @@ -82277,6 +82298,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", @@ -82324,7 +82352,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82361,6 +82389,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", @@ -82380,7 +82415,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82436,7 +82471,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82485,7 +82520,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82529,13 +82564,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", @@ -82578,6 +82606,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", @@ -82669,13 +82704,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", @@ -82723,7 +82751,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82772,7 +82800,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82816,6 +82844,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", @@ -82858,13 +82893,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", @@ -82919,7 +82947,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -82963,6 +82991,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", @@ -83010,7 +83045,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -83089,6 +83124,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", @@ -83306,6 +83348,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", @@ -83348,6 +83397,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", @@ -83395,7 +83451,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -83439,13 +83495,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", @@ -83523,13 +83572,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", @@ -83584,7 +83626,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -83628,6 +83670,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", @@ -83682,7 +83731,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", @@ -83733,13 +83782,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", @@ -83852,13 +83894,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", @@ -83913,7 +83948,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -83997,7 +84032,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -84111,6 +84146,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", @@ -84153,6 +84195,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", @@ -84195,6 +84244,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", @@ -84279,6 +84335,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", @@ -84361,7 +84424,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -84440,6 +84503,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", @@ -84524,13 +84594,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", @@ -84622,13 +84685,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", @@ -84683,7 +84739,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -84788,7 +84844,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -84830,7 +84886,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -84879,7 +84935,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -84958,6 +85014,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", @@ -85012,7 +85075,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -85068,7 +85131,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -85229,7 +85292,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -85278,7 +85341,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -85369,7 +85432,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -85462,6 +85525,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", @@ -85581,6 +85651,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", @@ -85623,6 +85700,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", @@ -85670,7 +85754,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -85784,6 +85868,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", @@ -85831,7 +85922,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -85875,6 +85966,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", @@ -85917,6 +86015,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", @@ -86008,6 +86113,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", @@ -86055,7 +86167,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -86104,7 +86216,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -86153,7 +86265,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -86246,6 +86358,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", @@ -86288,6 +86407,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", @@ -86372,6 +86498,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", @@ -86391,7 +86524,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -86496,7 +86629,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -86540,13 +86673,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", @@ -86589,6 +86715,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", @@ -86636,7 +86769,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -86680,13 +86813,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", @@ -86734,7 +86860,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -86827,6 +86953,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", @@ -86869,13 +87002,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", @@ -86930,7 +87056,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -86974,6 +87100,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", @@ -87021,7 +87154,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -87100,6 +87233,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", @@ -87189,7 +87329,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -87317,6 +87457,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", @@ -87359,6 +87506,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", @@ -87406,7 +87560,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -87450,13 +87604,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", @@ -87534,13 +87681,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", @@ -87595,7 +87735,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -87639,6 +87779,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", @@ -87693,7 +87840,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", @@ -87744,13 +87891,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", @@ -87863,13 +88003,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ak", @@ -87912,13 +88045,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"am", @@ -87989,13 +88115,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ar", @@ -88318,13 +88437,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"bn", @@ -88472,13 +88584,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"cs", @@ -88521,13 +88626,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"de", @@ -88570,13 +88668,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"el", @@ -88619,13 +88710,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"en", @@ -88668,13 +88752,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"es", @@ -88710,13 +88787,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"fa", @@ -88752,13 +88822,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"fil", @@ -88801,13 +88864,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"fr", @@ -88920,13 +88976,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ha", @@ -88962,13 +89011,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"hi", @@ -89116,13 +89158,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"id", @@ -89165,13 +89200,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ig", @@ -89249,13 +89277,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"it", @@ -89298,13 +89319,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ja", @@ -89515,13 +89529,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ko", @@ -89662,13 +89669,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"mg", @@ -89774,13 +89774,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ms", @@ -89851,13 +89844,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ne", @@ -89900,13 +89886,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"nl", @@ -89942,13 +89921,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ny", @@ -89991,13 +89963,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"om", @@ -90110,13 +90075,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"pl", @@ -90152,13 +90110,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"pt", @@ -90201,13 +90152,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ro", @@ -90250,13 +90194,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"ru", @@ -90299,13 +90236,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"rw", @@ -90376,13 +90306,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"si", @@ -90425,13 +90348,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"sn", @@ -90467,13 +90383,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"so", @@ -90509,13 +90418,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"sr", @@ -90593,13 +90495,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"sv", @@ -90642,13 +90537,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"sw", @@ -90726,13 +90614,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"te", @@ -90880,13 +90761,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"tr", @@ -90922,13 +90796,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"uk", @@ -91069,13 +90936,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"vi", @@ -91118,13 +90978,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"wo", @@ -91202,13 +91055,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"xh", @@ -91251,13 +91097,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"yo", @@ -91335,13 +91174,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"zh", @@ -91384,13 +91216,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"meta-llama\/llama-3.1-8b-instruct", "bcp_47":"zu", @@ -91426,13 +91251,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"ar", @@ -91475,13 +91293,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"bn", @@ -91524,13 +91335,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"de", @@ -91573,13 +91377,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"en", @@ -91622,13 +91419,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"es", @@ -91671,13 +91461,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"fr", @@ -91713,13 +91496,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"hi", @@ -91755,13 +91531,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"id", @@ -91804,13 +91573,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"ja", @@ -91951,13 +91713,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"pt", @@ -92000,13 +91755,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"ru", @@ -92049,13 +91797,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"sw", @@ -92098,13 +91839,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"te", @@ -92175,13 +91909,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"vi", @@ -92224,13 +91951,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"meta-llama\/llama-3.2-1b-instruct", "bcp_47":"zh", @@ -92343,13 +92063,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", @@ -92488,7 +92201,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -92602,6 +92315,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", @@ -92644,6 +92364,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", @@ -92686,6 +92413,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", @@ -92770,6 +92504,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", @@ -92852,7 +92593,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -92931,6 +92672,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", @@ -93015,13 +92763,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", @@ -93113,13 +92854,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", @@ -93174,7 +92908,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -93321,7 +93055,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -93370,7 +93104,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -93449,6 +93183,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", @@ -93720,7 +93461,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -93769,7 +93510,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -93860,7 +93601,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -93909,7 +93650,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -93953,6 +93694,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", @@ -94072,6 +93820,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", @@ -94114,6 +93869,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", @@ -94161,7 +93923,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -94275,6 +94037,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", @@ -94322,7 +94091,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -94366,6 +94135,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", @@ -94408,6 +94184,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", @@ -94455,7 +94238,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -94499,6 +94282,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", @@ -94546,7 +94336,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -94595,7 +94385,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -94644,7 +94434,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -94693,7 +94483,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -94737,6 +94527,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", @@ -94779,6 +94576,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", @@ -94863,6 +94667,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", @@ -94882,7 +94693,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -94938,7 +94749,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -94987,7 +94798,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -95031,13 +94842,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", @@ -95080,6 +94884,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", @@ -95127,7 +94938,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -95171,13 +94982,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", @@ -95225,7 +95029,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -95274,7 +95078,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -95318,6 +95122,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", @@ -95360,13 +95171,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", @@ -95465,6 +95269,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", @@ -95591,6 +95402,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", @@ -95680,7 +95498,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -95808,6 +95626,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", @@ -95850,6 +95675,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", @@ -95897,7 +95729,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -95941,13 +95773,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", @@ -96025,13 +95850,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", @@ -96086,7 +95904,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -96130,6 +95948,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", @@ -96184,7 +96009,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", @@ -96235,13 +96060,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", @@ -96354,13 +96172,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", @@ -96415,7 +96226,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -96499,7 +96310,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", @@ -96613,6 +96424,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", @@ -96655,6 +96473,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", @@ -96697,6 +96522,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", @@ -96781,6 +96613,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", @@ -96863,7 +96702,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -96942,6 +96781,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", @@ -97026,13 +96872,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", @@ -97124,13 +96963,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", @@ -97185,7 +97017,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"meta-llama\/llama-4-maverick", @@ -97290,7 +97122,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", @@ -97381,7 +97213,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", @@ -97460,6 +97292,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", @@ -97514,7 +97353,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", @@ -97731,7 +97570,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -97871,7 +97710,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", @@ -97920,7 +97759,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -97964,6 +97803,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", @@ -98083,6 +97929,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", @@ -98125,6 +97978,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", @@ -98172,7 +98032,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -98286,6 +98146,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", @@ -98333,7 +98200,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", @@ -98377,6 +98244,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", @@ -98419,6 +98293,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", @@ -98466,7 +98347,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -98510,6 +98391,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", @@ -98557,7 +98445,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", @@ -98704,7 +98592,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", @@ -98748,6 +98636,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", @@ -98790,6 +98685,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", @@ -98874,6 +98776,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", @@ -98893,7 +98802,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", @@ -98949,7 +98858,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -99042,13 +98951,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", @@ -99091,6 +98993,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", @@ -99138,7 +99047,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -99182,13 +99091,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", @@ -99236,7 +99138,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", @@ -99285,7 +99187,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -99329,6 +99231,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", @@ -99371,13 +99280,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", @@ -99432,7 +99334,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -99476,6 +99378,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", @@ -99523,7 +99432,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", @@ -99602,6 +99511,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", @@ -99691,7 +99607,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", @@ -99740,7 +99656,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -99819,6 +99735,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", @@ -99861,6 +99784,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", @@ -99908,7 +99838,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -99952,13 +99882,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", @@ -100036,13 +99959,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", @@ -100097,7 +100013,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", @@ -100141,6 +100057,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", @@ -100195,7 +100118,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", @@ -100246,13 +100169,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", @@ -100365,13 +100281,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"microsoft\/phi-4", "bcp_47":"ak", @@ -100426,7 +100335,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"microsoft\/phi-4", @@ -100624,6 +100533,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4", "bcp_47":"as", @@ -100666,6 +100582,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"awa", @@ -100708,6 +100631,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"az", @@ -100792,6 +100722,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"bho", @@ -100874,7 +100811,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -100953,6 +100890,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", @@ -101037,13 +100981,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"microsoft\/phi-4", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"microsoft\/phi-4", "bcp_47":"cs", @@ -101091,7 +101028,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -101135,13 +101072,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"microsoft\/phi-4", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"microsoft\/phi-4", "bcp_47":"el", @@ -101252,7 +101182,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -101301,7 +101231,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"microsoft\/phi-4", @@ -101343,7 +101273,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -101471,6 +101401,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"gu", @@ -101525,7 +101462,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"microsoft\/phi-4", @@ -101581,7 +101518,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"microsoft\/phi-4", @@ -101742,7 +101679,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"microsoft\/phi-4", @@ -101791,7 +101728,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { "model":"microsoft\/phi-4", @@ -101882,7 +101819,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -101931,7 +101868,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -101975,6 +101912,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"jv", @@ -102094,6 +102038,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4", "bcp_47":"km", @@ -102136,6 +102087,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4", "bcp_47":"kn", @@ -102183,7 +102141,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"microsoft\/phi-4", @@ -102297,6 +102255,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"microsoft\/phi-4", "bcp_47":"mai", @@ -102388,6 +102353,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"ml", @@ -102430,6 +102402,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"microsoft\/phi-4", "bcp_47":"mr", @@ -102521,6 +102500,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4", "bcp_47":"my", @@ -102617,7 +102603,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -102666,7 +102652,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { "model":"microsoft\/phi-4", @@ -102715,7 +102701,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { "model":"microsoft\/phi-4", @@ -102759,6 +102745,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4", "bcp_47":"or", @@ -102801,6 +102794,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"pa", @@ -102848,7 +102848,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -102885,6 +102885,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4", "bcp_47":"pt", @@ -102904,7 +102911,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -103009,7 +103016,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"microsoft\/phi-4", @@ -103053,13 +103060,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"microsoft\/phi-4", "bcp_47":"rw", @@ -103102,6 +103102,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4", "bcp_47":"sd", @@ -103149,7 +103156,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { "model":"microsoft\/phi-4", @@ -103193,13 +103200,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"microsoft\/phi-4", "bcp_47":"sn", @@ -103247,7 +103247,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.1 }, { "model":"microsoft\/phi-4", @@ -103296,7 +103296,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"microsoft\/phi-4", @@ -103340,6 +103340,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"microsoft\/phi-4", "bcp_47":"su", @@ -103382,13 +103389,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"microsoft\/phi-4", "bcp_47":"sv", @@ -103443,7 +103443,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"microsoft\/phi-4", @@ -103487,6 +103487,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"ta", @@ -103534,7 +103541,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"microsoft\/phi-4", @@ -103613,6 +103620,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"th", @@ -103702,7 +103716,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -103751,7 +103765,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -103830,6 +103844,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"ur", @@ -103872,6 +103893,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4", "bcp_47":"uz", @@ -103919,7 +103947,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -103963,13 +103991,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"microsoft\/phi-4", "bcp_47":"wo", @@ -104047,13 +104068,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"microsoft\/phi-4", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"microsoft\/phi-4", "bcp_47":"xh", @@ -104108,7 +104122,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { "model":"microsoft\/phi-4", @@ -104152,6 +104166,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"yue", @@ -104206,7 +104227,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"microsoft\/phi-4", @@ -104257,13 +104278,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"microsoft\/phi-4", "bcp_47":"zu", @@ -104376,13 +104390,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", @@ -104437,7 +104444,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -104521,7 +104528,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -104635,6 +104642,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", @@ -104677,6 +104691,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", @@ -104719,6 +104740,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", @@ -104803,6 +104831,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", @@ -104885,7 +104920,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -104964,6 +104999,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", @@ -105048,13 +105090,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", @@ -105102,7 +105137,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105146,13 +105181,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", @@ -105207,7 +105235,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105263,7 +105291,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105312,7 +105340,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105354,7 +105382,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105482,6 +105510,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", @@ -105536,7 +105571,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105592,7 +105627,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105802,7 +105837,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105893,7 +105928,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105942,7 +105977,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -105986,6 +106021,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", @@ -106105,6 +106147,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", @@ -106147,6 +106196,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", @@ -106194,7 +106250,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -106308,6 +106364,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", @@ -106355,7 +106418,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -106399,6 +106462,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", @@ -106441,6 +106511,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", @@ -106488,7 +106565,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -106532,6 +106609,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", @@ -106677,7 +106761,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -106726,7 +106810,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -106770,6 +106854,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", @@ -106812,6 +106903,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", @@ -106859,7 +106957,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -106896,6 +106994,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", @@ -107020,7 +107125,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -107064,13 +107169,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", @@ -107113,6 +107211,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", @@ -107160,7 +107265,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -107204,13 +107309,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sn", @@ -107258,7 +107356,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -107351,6 +107449,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", @@ -107393,13 +107498,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sv", @@ -107454,7 +107552,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -107498,6 +107596,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", @@ -107545,7 +107650,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -107624,6 +107729,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"th", @@ -107713,7 +107825,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -107841,6 +107953,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", @@ -107883,6 +108002,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", @@ -107930,7 +108056,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -107974,13 +108100,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wo", @@ -108058,13 +108177,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"xh", @@ -108119,7 +108231,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -108163,6 +108275,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", @@ -108217,7 +108336,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", @@ -108268,13 +108387,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", @@ -108387,13 +108499,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ak", @@ -108448,7 +108553,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"mistralai\/mistral-nemo", @@ -108532,7 +108637,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"mistralai\/mistral-nemo", @@ -108646,6 +108751,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", @@ -108688,6 +108800,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", @@ -108730,6 +108849,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", @@ -108814,6 +108940,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", @@ -108896,7 +109029,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"mistralai\/mistral-nemo", @@ -108975,6 +109108,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", @@ -109059,13 +109199,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"mistralai\/mistral-nemo", "bcp_47":"cs", @@ -109113,7 +109246,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"mistralai\/mistral-nemo", @@ -109157,13 +109290,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"mistralai\/mistral-nemo", "bcp_47":"el", @@ -109218,7 +109344,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"mistralai\/mistral-nemo", @@ -109323,7 +109449,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"mistralai\/mistral-nemo", @@ -109365,7 +109491,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { "model":"mistralai\/mistral-nemo", @@ -109493,6 +109619,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", @@ -109603,7 +109736,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"mistralai\/mistral-nemo", @@ -109764,7 +109897,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"mistralai\/mistral-nemo", @@ -109813,7 +109946,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"mistralai\/mistral-nemo", @@ -109904,7 +110037,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"mistralai\/mistral-nemo", @@ -109953,7 +110086,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"mistralai\/mistral-nemo", @@ -109997,6 +110130,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", @@ -110116,6 +110256,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", @@ -110158,6 +110305,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", @@ -110205,7 +110359,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"mistralai\/mistral-nemo", @@ -110319,6 +110473,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", @@ -110366,7 +110527,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { "model":"mistralai\/mistral-nemo", @@ -110410,6 +110571,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", @@ -110452,6 +110620,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", @@ -110499,7 +110674,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { "model":"mistralai\/mistral-nemo", @@ -110543,6 +110718,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", @@ -110590,7 +110772,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { "model":"mistralai\/mistral-nemo", @@ -110639,7 +110821,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"mistralai\/mistral-nemo", @@ -110688,7 +110870,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { "model":"mistralai\/mistral-nemo", @@ -110737,7 +110919,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { "model":"mistralai\/mistral-nemo", @@ -110781,6 +110963,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", @@ -110823,6 +111012,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", @@ -110870,7 +111066,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"mistralai\/mistral-nemo", @@ -110907,6 +111103,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", @@ -111031,7 +111234,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { "model":"mistralai\/mistral-nemo", @@ -111075,13 +111278,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"mistralai\/mistral-nemo", "bcp_47":"rw", @@ -111124,6 +111320,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", @@ -111171,7 +111374,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { "model":"mistralai\/mistral-nemo", @@ -111215,13 +111418,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sn", @@ -111269,7 +111465,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"mistralai\/mistral-nemo", @@ -111318,7 +111514,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"mistralai\/mistral-nemo", @@ -111362,6 +111558,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", @@ -111404,13 +111607,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sv", @@ -111465,7 +111661,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"mistralai\/mistral-nemo", @@ -111509,6 +111705,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", @@ -111635,6 +111838,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"th", @@ -111724,7 +111934,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { "model":"mistralai\/mistral-nemo", @@ -111773,7 +111983,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"mistralai\/mistral-nemo", @@ -111852,6 +112062,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", @@ -111894,6 +112111,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", @@ -111941,7 +112165,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { "model":"mistralai\/mistral-nemo", @@ -111985,13 +112209,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wo", @@ -112069,13 +112286,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"mistralai\/mistral-nemo", "bcp_47":"xh", @@ -112130,7 +112340,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { "model":"mistralai\/mistral-nemo", @@ -112174,6 +112384,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", @@ -112228,7 +112445,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"mistralai\/mistral-nemo", @@ -112279,13 +112496,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", @@ -112398,13 +112608,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"mistralai\/mistral-saba", "bcp_47":"ak", @@ -112459,7 +112662,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"mistralai\/mistral-saba", @@ -112543,7 +112746,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { "model":"mistralai\/mistral-saba", @@ -112657,6 +112860,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", @@ -112699,6 +112909,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", @@ -112741,6 +112958,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", @@ -112825,6 +113049,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", @@ -112986,6 +113217,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", @@ -113070,13 +113308,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"mistralai\/mistral-saba", "bcp_47":"cs", @@ -113124,7 +113355,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"mistralai\/mistral-saba", @@ -113168,13 +113399,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"mistralai\/mistral-saba", "bcp_47":"el", @@ -113285,7 +113509,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -113334,7 +113558,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -113376,7 +113600,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"mistralai\/mistral-saba", @@ -113504,6 +113728,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", @@ -113558,7 +113789,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"mistralai\/mistral-saba", @@ -113614,7 +113845,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -113824,7 +114055,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { "model":"mistralai\/mistral-saba", @@ -113915,7 +114146,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -113964,7 +114195,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"mistralai\/mistral-saba", @@ -114008,6 +114239,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", @@ -114127,6 +114365,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", @@ -114169,6 +114414,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", @@ -114216,7 +114468,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"mistralai\/mistral-saba", @@ -114330,6 +114582,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", @@ -114377,7 +114636,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { "model":"mistralai\/mistral-saba", @@ -114421,6 +114680,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", @@ -114463,6 +114729,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", @@ -114554,6 +114827,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", @@ -114601,7 +114881,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -114650,7 +114930,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { "model":"mistralai\/mistral-saba", @@ -114699,7 +114979,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { "model":"mistralai\/mistral-saba", @@ -114748,7 +115028,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"mistralai\/mistral-saba", @@ -114792,6 +115072,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", @@ -114834,6 +115121,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", @@ -114881,7 +115175,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -114918,6 +115212,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", @@ -114937,7 +115238,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"mistralai\/mistral-saba", @@ -114993,7 +115294,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -115086,13 +115387,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"mistralai\/mistral-saba", "bcp_47":"rw", @@ -115135,6 +115429,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", @@ -115182,7 +115483,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.1 }, { "model":"mistralai\/mistral-saba", @@ -115226,13 +115527,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"mistralai\/mistral-saba", "bcp_47":"sn", @@ -115280,7 +115574,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { "model":"mistralai\/mistral-saba", @@ -115373,6 +115667,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", @@ -115415,13 +115716,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"mistralai\/mistral-saba", "bcp_47":"sv", @@ -115476,7 +115770,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -115520,6 +115814,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", @@ -115567,7 +115868,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -115646,6 +115947,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"th", @@ -115735,7 +116043,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"mistralai\/mistral-saba", @@ -115784,7 +116092,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"mistralai\/mistral-saba", @@ -115863,6 +116171,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", @@ -115905,6 +116220,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", @@ -115952,7 +116274,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"mistralai\/mistral-saba", @@ -115996,13 +116318,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"mistralai\/mistral-saba", "bcp_47":"wo", @@ -116080,13 +116395,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"mistralai\/mistral-saba", "bcp_47":"xh", @@ -116141,7 +116449,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { "model":"mistralai\/mistral-saba", @@ -116185,6 +116493,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", @@ -116239,7 +116554,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-saba", @@ -116290,13 +116605,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", @@ -116409,13 +116717,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", @@ -116470,7 +116771,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -116554,7 +116855,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -116668,6 +116969,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", @@ -116710,6 +117018,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", @@ -116752,6 +117067,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", @@ -116836,6 +117158,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", @@ -116918,7 +117247,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -116997,6 +117326,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", @@ -117081,13 +117417,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", @@ -117135,7 +117464,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -117179,13 +117508,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", @@ -117296,7 +117618,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -117345,7 +117667,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -117515,6 +117837,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", @@ -117569,7 +117898,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -117786,7 +118115,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -117835,7 +118164,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -117926,7 +118255,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -118019,6 +118348,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", @@ -118138,6 +118474,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", @@ -118180,6 +118523,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", @@ -118227,7 +118577,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -118341,6 +118691,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", @@ -118432,6 +118789,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", @@ -118474,6 +118838,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", @@ -118521,7 +118892,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -118565,6 +118936,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", @@ -118612,7 +118990,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -118661,7 +119039,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -118710,7 +119088,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -118759,7 +119137,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -118803,6 +119181,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", @@ -118845,6 +119230,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", @@ -118892,7 +119284,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -118929,6 +119321,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", @@ -118948,7 +119347,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -119004,7 +119403,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -119053,7 +119452,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -119097,13 +119496,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", @@ -119146,6 +119538,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", @@ -119193,7 +119592,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -119237,13 +119636,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", @@ -119291,7 +119683,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -119340,7 +119732,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -119384,6 +119776,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", @@ -119426,13 +119825,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", @@ -119487,7 +119879,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -119531,6 +119923,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", @@ -119578,7 +119977,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -119657,6 +120056,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", @@ -119874,6 +120280,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", @@ -119916,6 +120329,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", @@ -119963,7 +120383,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -120007,13 +120427,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", @@ -120091,13 +120504,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", @@ -120152,7 +120558,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -120196,6 +120602,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", @@ -120250,7 +120663,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", @@ -120301,13 +120714,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", @@ -120420,13 +120826,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ak", @@ -120469,13 +120868,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"am", @@ -120546,13 +120938,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ar", @@ -120875,13 +121260,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"bn", @@ -121029,13 +121407,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"cs", @@ -121078,13 +121449,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"de", @@ -121127,13 +121491,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"el", @@ -121176,13 +121533,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"en", @@ -121225,13 +121575,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"es", @@ -121267,13 +121610,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"fa", @@ -121309,13 +121645,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"fil", @@ -121358,13 +121687,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"fr", @@ -121477,13 +121799,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", @@ -121519,13 +121834,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"openai\/gpt-4.1", "bcp_47":"hi", @@ -121673,13 +121981,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"id", @@ -121722,13 +122023,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ig", @@ -121806,13 +122100,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"it", @@ -121855,13 +122142,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ja", @@ -122072,13 +122352,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ko", @@ -122219,13 +122492,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"mg", @@ -122331,13 +122597,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ms", @@ -122408,13 +122667,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ne", @@ -122457,13 +122709,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"openai\/gpt-4.1", "bcp_47":"nl", @@ -122499,13 +122744,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ny", @@ -122548,13 +122786,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"om", @@ -122667,13 +122898,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"pl", @@ -122709,13 +122933,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"openai\/gpt-4.1", "bcp_47":"pt", @@ -122758,13 +122975,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ro", @@ -122807,13 +123017,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"ru", @@ -122856,13 +123059,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"openai\/gpt-4.1", "bcp_47":"rw", @@ -122933,13 +123129,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"si", @@ -122982,13 +123171,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"sn", @@ -123024,13 +123206,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1", "bcp_47":"so", @@ -123066,13 +123241,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"openai\/gpt-4.1", "bcp_47":"sr", @@ -123150,13 +123318,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"sv", @@ -123199,13 +123360,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", @@ -123283,13 +123437,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"te", @@ -123437,13 +123584,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"tr", @@ -123479,13 +123619,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"uk", @@ -123626,13 +123759,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"vi", @@ -123675,13 +123801,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"openai\/gpt-4.1", "bcp_47":"wo", @@ -123759,13 +123878,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1", "bcp_47":"xh", @@ -123808,13 +123920,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", @@ -123892,13 +123997,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, { "model":"openai\/gpt-4.1", "bcp_47":"zh", @@ -123941,13 +124039,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", @@ -124060,13 +124151,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", @@ -124205,7 +124289,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"openai\/gpt-4.1-mini", @@ -124319,6 +124403,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", @@ -124361,6 +124452,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", @@ -124403,6 +124501,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", @@ -124487,6 +124592,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", @@ -124648,6 +124760,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", @@ -124732,13 +124851,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", @@ -124786,7 +124898,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4.1-mini", @@ -124830,13 +124942,6 @@ "metric":"accuracy", "score":1.0 }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"el", @@ -124891,7 +124996,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"openai\/gpt-4.1-mini", @@ -124996,7 +125101,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"openai\/gpt-4.1-mini", @@ -125087,7 +125192,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"openai\/gpt-4.1-mini", @@ -125166,6 +125271,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", @@ -125276,7 +125388,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"openai\/gpt-4.1-mini", @@ -125577,7 +125689,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { "model":"openai\/gpt-4.1-mini", @@ -125626,7 +125738,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"openai\/gpt-4.1-mini", @@ -125670,6 +125782,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", @@ -125789,6 +125908,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", @@ -125831,6 +125957,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", @@ -125992,6 +126125,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", @@ -126039,7 +126179,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"openai\/gpt-4.1-mini", @@ -126083,6 +126223,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", @@ -126125,6 +126272,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", @@ -126172,7 +126326,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"openai\/gpt-4.1-mini", @@ -126216,6 +126370,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", @@ -126312,7 +126473,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"openai\/gpt-4.1-mini", @@ -126361,7 +126522,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"openai\/gpt-4.1-mini", @@ -126410,7 +126571,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"openai\/gpt-4.1-mini", @@ -126454,6 +126615,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", @@ -126496,6 +126664,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", @@ -126543,7 +126718,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"openai\/gpt-4.1-mini", @@ -126580,6 +126755,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", @@ -126655,7 +126837,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4.1-mini", @@ -126748,13 +126930,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", @@ -126797,6 +126972,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", @@ -126888,13 +127070,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", @@ -126942,7 +127117,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"openai\/gpt-4.1-mini", @@ -127035,6 +127210,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", @@ -127077,13 +127259,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", @@ -127138,7 +127313,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"openai\/gpt-4.1-mini", @@ -127182,6 +127357,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", @@ -127229,7 +127411,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"openai\/gpt-4.1-mini", @@ -127308,6 +127490,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"th", @@ -127397,7 +127586,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"openai\/gpt-4.1-mini", @@ -127446,7 +127635,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { "model":"openai\/gpt-4.1-mini", @@ -127525,6 +127714,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", @@ -127567,6 +127763,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", @@ -127614,7 +127817,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"openai\/gpt-4.1-mini", @@ -127658,13 +127861,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", @@ -127742,13 +127938,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", @@ -127803,7 +127992,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { "model":"openai\/gpt-4.1-mini", @@ -127847,6 +128036,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", @@ -127901,7 +128097,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"openai\/gpt-4.1-mini", @@ -127952,13 +128148,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", @@ -128071,13 +128260,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", @@ -128132,7 +128314,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"openai\/gpt-4.1-nano", @@ -128216,7 +128398,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -128330,6 +128512,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", @@ -128372,6 +128561,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", @@ -128414,6 +128610,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", @@ -128498,6 +128701,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", @@ -128580,7 +128790,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { "model":"openai\/gpt-4.1-nano", @@ -128659,6 +128869,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", @@ -128743,13 +128960,6 @@ "metric":"accuracy", "score":0.6 }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", @@ -128797,7 +129007,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -128841,13 +129051,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"el", @@ -128902,7 +129105,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"openai\/gpt-4.1-nano", @@ -128958,7 +129161,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { "model":"openai\/gpt-4.1-nano", @@ -129007,7 +129210,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"openai\/gpt-4.1-nano", @@ -129098,7 +129301,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"openai\/gpt-4.1-nano", @@ -129177,6 +129380,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", @@ -129231,7 +129441,7 @@ "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { "model":"openai\/gpt-4.1-nano", @@ -129497,7 +129707,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { "model":"openai\/gpt-4.1-nano", @@ -129588,7 +129798,7 @@ "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { "model":"openai\/gpt-4.1-nano", @@ -129637,7 +129847,7 @@ "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { "model":"openai\/gpt-4.1-nano", @@ -129681,6 +129891,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", @@ -129800,6 +130017,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", @@ -129842,6 +130066,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", @@ -129889,7 +130120,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -130003,6 +130234,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", @@ -130050,7 +130288,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -130094,6 +130332,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", @@ -130136,6 +130381,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", @@ -130183,7 +130435,7 @@ "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -130227,6 +130479,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", @@ -130274,7 +130533,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { "model":"openai\/gpt-4.1-nano", @@ -130372,7 +130631,7 @@ "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { "model":"openai\/gpt-4.1-nano", @@ -130421,7 +130680,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { "model":"openai\/gpt-4.1-nano", @@ -130465,6 +130724,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", @@ -130507,6 +130773,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", @@ -130554,7 +130827,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -130591,6 +130864,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", @@ -130715,7 +130995,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -130759,13 +131039,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", @@ -130808,6 +131081,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", @@ -130855,7 +131135,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { "model":"openai\/gpt-4.1-nano", @@ -130899,13 +131179,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", @@ -130953,7 +131226,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { "model":"openai\/gpt-4.1-nano", @@ -131002,7 +131275,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"openai\/gpt-4.1-nano", @@ -131046,6 +131319,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", @@ -131088,13 +131368,6 @@ "metric":"accuracy", "score":0.8 }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", @@ -131149,7 +131422,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { "model":"openai\/gpt-4.1-nano", @@ -131193,6 +131466,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", @@ -131240,7 +131520,7 @@ "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -131319,6 +131599,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"th", @@ -131408,7 +131695,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -131536,6 +131823,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", @@ -131578,6 +131872,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", @@ -131669,13 +131970,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", @@ -131753,13 +132047,6 @@ "metric":"accuracy", "score":0.4 }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", @@ -131814,7 +132101,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"openai\/gpt-4.1-nano", @@ -131858,6 +132145,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", @@ -131912,7 +132206,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4.1-nano", @@ -131963,13 +132257,6 @@ "metric":"accuracy", "score":0.2 }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", @@ -132082,13 +132369,6 @@ "metric":"accuracy", "score":0.1 }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ak", @@ -132143,7 +132423,7 @@ "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"openai\/gpt-4o-mini", @@ -132227,7 +132507,7 @@ "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -132341,6 +132621,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", @@ -132383,6 +132670,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", @@ -132425,6 +132719,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", @@ -132509,6 +132810,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", @@ -132591,7 +132899,7 @@ "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -132670,6 +132978,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", @@ -132754,13 +133069,6 @@ "metric":"accuracy", "score":0.7 }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, { "model":"openai\/gpt-4o-mini", "bcp_47":"cs", @@ -132808,7 +133116,7 @@ "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { "model":"openai\/gpt-4o-mini", @@ -132852,13 +133160,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, { "model":"openai\/gpt-4o-mini", "bcp_47":"el", @@ -132913,7 +133214,7 @@ "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -132969,7 +133270,7 @@ "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -133018,7 +133319,7 @@ "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -133060,7 +133361,7 @@ "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -133109,7 +133410,7 @@ "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"openai\/gpt-4o-mini", @@ -133188,6 +133489,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", @@ -133298,7 +133606,7 @@ "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -133459,7 +133767,7 @@ "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -133508,7 +133816,7 @@ "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { "model":"openai\/gpt-4o-mini", @@ -133692,6 +134000,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", @@ -133811,6 +134126,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", @@ -133853,6 +134175,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", @@ -133900,7 +134229,7 @@ "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -134014,6 +134343,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", @@ -134061,7 +134397,7 @@ "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { "model":"openai\/gpt-4o-mini", @@ -134105,6 +134441,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", @@ -134147,6 +134490,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", @@ -134238,6 +134588,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", @@ -134285,7 +134642,7 @@ "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -134334,7 +134691,7 @@ "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -134432,7 +134789,7 @@ "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { "model":"openai\/gpt-4o-mini", @@ -134476,6 +134833,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", @@ -134518,6 +134882,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", @@ -134565,7 +134936,7 @@ "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { "model":"openai\/gpt-4o-mini", @@ -134602,6 +134973,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", @@ -134621,7 +134999,7 @@ "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -134677,7 +135055,7 @@ "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -134726,7 +135104,7 @@ "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { "model":"openai\/gpt-4o-mini", @@ -134770,13 +135148,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4o-mini", "bcp_47":"rw", @@ -134819,6 +135190,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", @@ -134866,7 +135244,7 @@ "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"openai\/gpt-4o-mini", @@ -134910,13 +135288,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sn", @@ -134964,7 +135335,7 @@ "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { "model":"openai\/gpt-4o-mini", @@ -135013,7 +135384,7 @@ "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -135057,6 +135428,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", @@ -135099,13 +135477,6 @@ "metric":"accuracy", "score":0.9 }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sv", @@ -135160,7 +135531,7 @@ "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { "model":"openai\/gpt-4o-mini", @@ -135204,6 +135575,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", @@ -135330,6 +135708,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"th", @@ -135419,7 +135804,7 @@ "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -135468,7 +135853,7 @@ "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -135547,6 +135932,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", @@ -135589,6 +135981,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", @@ -135636,7 +136035,7 @@ "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { "model":"openai\/gpt-4o-mini", @@ -135680,13 +136079,6 @@ "metric":"accuracy", "score":0.0 }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wo", @@ -135764,13 +136156,6 @@ "metric":"accuracy", "score":0.5 }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4o-mini", "bcp_47":"xh", @@ -135825,7 +136210,7 @@ "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { "model":"openai\/gpt-4o-mini", @@ -135869,6 +136254,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue", @@ -135923,7 +136315,7 @@ "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { "model":"openai\/gpt-4o-mini", @@ -135974,13 +136366,6 @@ "metric":"accuracy", "score":0.3 }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu",