diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -209,6 +209,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", @@ -321,6 +328,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", @@ -356,6 +370,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", @@ -391,6 +412,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", @@ -426,6 +454,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"be", @@ -461,6 +496,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", @@ -615,6 +657,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", @@ -650,6 +699,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ckb", @@ -930,6 +986,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", @@ -1098,6 +1161,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", @@ -1182,6 +1252,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", @@ -1336,6 +1413,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", @@ -1427,6 +1511,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ilo", @@ -1560,6 +1651,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", @@ -1630,6 +1728,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", @@ -1665,6 +1770,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", @@ -1700,6 +1812,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", @@ -1735,6 +1854,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", @@ -1847,6 +1973,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", @@ -1882,6 +2015,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", @@ -1924,6 +2064,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", @@ -1959,6 +2106,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", @@ -1994,6 +2148,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", @@ -2036,6 +2197,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", @@ -2071,6 +2239,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", @@ -2162,6 +2337,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", @@ -2253,6 +2435,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", @@ -2288,6 +2477,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", @@ -2365,6 +2561,13 @@ "metric":"chrf", "score":0.5129096175 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", @@ -2372,6 +2575,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", @@ -2407,6 +2617,13 @@ "metric":"chrf", "score":0.632650236 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", @@ -2561,6 +2778,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", @@ -2596,6 +2820,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", @@ -2687,6 +2918,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", @@ -2729,6 +2967,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", @@ -2771,6 +3016,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", @@ -2904,6 +3156,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", @@ -3065,6 +3324,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ti", @@ -3100,6 +3366,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", @@ -3142,6 +3415,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", @@ -3219,6 +3499,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", @@ -3254,6 +3541,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", @@ -3289,6 +3583,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", @@ -3513,6 +3814,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", @@ -3849,6 +4157,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", @@ -3961,6 +4276,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", @@ -3996,6 +4318,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", @@ -4031,6 +4360,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", @@ -4066,6 +4402,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"be", @@ -4101,6 +4444,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", @@ -4255,6 +4605,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", @@ -4290,6 +4647,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ckb", @@ -4570,6 +4934,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", @@ -4738,6 +5109,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", @@ -4822,6 +5200,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", @@ -4976,6 +5361,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", @@ -5067,6 +5459,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ilo", @@ -5200,6 +5599,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", @@ -5270,6 +5676,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", @@ -5305,6 +5718,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", @@ -5340,6 +5760,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", @@ -5375,6 +5802,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", @@ -5487,6 +5921,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", @@ -5522,6 +5963,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", @@ -5564,6 +6012,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", @@ -5599,6 +6054,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", @@ -5634,6 +6096,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", @@ -5676,6 +6145,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", @@ -5711,6 +6187,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", @@ -5802,6 +6285,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", @@ -5893,6 +6383,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", @@ -5928,6 +6425,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", @@ -6005,6 +6509,13 @@ "metric":"chrf", "score":0.5811687089 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", @@ -6012,6 +6523,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", @@ -6047,6 +6565,13 @@ "metric":"chrf", "score":0.7440224371 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", @@ -6201,6 +6726,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", @@ -6236,6 +6768,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", @@ -6327,6 +6866,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", @@ -6369,6 +6915,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", @@ -6411,6 +6964,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", @@ -6544,6 +7104,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", @@ -6705,6 +7272,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ti", @@ -6740,6 +7314,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", @@ -6782,6 +7363,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", @@ -6859,6 +7447,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", @@ -6894,6 +7489,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", @@ -6929,6 +7531,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", @@ -7153,6 +7762,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", @@ -7489,6 +8105,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", @@ -7601,6 +8224,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", @@ -7636,6 +8266,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", @@ -7671,6 +8308,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", @@ -7706,6 +8350,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"be", @@ -7741,6 +8392,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", @@ -7895,6 +8553,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", @@ -7930,6 +8595,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ckb", @@ -8210,6 +8882,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", @@ -8378,6 +9057,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", @@ -8462,6 +9148,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", @@ -8616,6 +9309,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", @@ -8707,6 +9407,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ilo", @@ -8840,6 +9547,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", @@ -8910,6 +9624,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", @@ -8945,6 +9666,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", @@ -8980,6 +9708,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", @@ -9015,6 +9750,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", @@ -9127,6 +9869,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", @@ -9162,6 +9911,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", @@ -9204,6 +9960,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", @@ -9239,6 +10002,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", @@ -9274,6 +10044,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", @@ -9316,6 +10093,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", @@ -9351,6 +10135,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", @@ -9442,6 +10233,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", @@ -9533,6 +10331,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", @@ -9568,6 +10373,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", @@ -9645,6 +10457,13 @@ "metric":"chrf", "score":0.5826652331 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", @@ -9652,6 +10471,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", @@ -9687,6 +10513,13 @@ "metric":"chrf", "score":0.7292524118 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", @@ -9841,6 +10674,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", @@ -9876,6 +10716,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", @@ -9967,6 +10814,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", @@ -10009,6 +10863,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", @@ -10051,6 +10912,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", @@ -10184,6 +11052,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", @@ -10345,6 +11220,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ti", @@ -10380,6 +11262,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", @@ -10422,6 +11311,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", @@ -10499,6 +11395,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", @@ -10534,6 +11437,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", @@ -10569,6 +11479,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", @@ -10793,6 +11710,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", @@ -11129,6 +12053,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", @@ -11241,6 +12172,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", @@ -11276,6 +12214,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", @@ -11311,6 +12256,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", @@ -11346,6 +12298,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"be", @@ -11381,6 +12340,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", @@ -11535,6 +12501,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", @@ -11570,6 +12543,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ckb", @@ -11850,6 +12830,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", @@ -12018,6 +13005,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", @@ -12102,6 +13096,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", @@ -12256,6 +13257,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", @@ -12347,6 +13355,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ilo", @@ -12480,6 +13495,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", @@ -12550,6 +13572,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", @@ -12585,6 +13614,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", @@ -12620,6 +13656,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", @@ -12655,6 +13698,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", @@ -12767,6 +13817,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", @@ -12802,6 +13859,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", @@ -12844,6 +13908,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", @@ -12879,6 +13950,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", @@ -12914,6 +13992,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", @@ -12956,6 +14041,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", @@ -12991,6 +14083,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", @@ -13082,6 +14181,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", @@ -13173,6 +14279,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", @@ -13208,6 +14321,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", @@ -13285,6 +14405,13 @@ "metric":"chrf", "score":0.5693090483 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", @@ -13292,6 +14419,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", @@ -13327,6 +14461,13 @@ "metric":"chrf", "score":0.6981449573 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", @@ -13481,6 +14622,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", @@ -13516,6 +14664,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", @@ -13607,6 +14762,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", @@ -13649,6 +14811,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", @@ -13691,6 +14860,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", @@ -13824,6 +15000,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", @@ -13985,6 +15168,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ti", @@ -14020,6 +15210,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", @@ -14062,6 +15259,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", @@ -14139,6 +15343,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", @@ -14174,6 +15385,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", @@ -14209,6 +15427,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", @@ -14433,6 +15658,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", @@ -14769,6 +16001,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", @@ -14881,6 +16120,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", @@ -14916,6 +16162,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", @@ -14951,6 +16204,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", @@ -14986,6 +16246,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"be", @@ -15021,6 +16288,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", @@ -15175,6 +16449,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", @@ -15210,6 +16491,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ckb", @@ -15490,6 +16778,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", @@ -15658,6 +16953,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", @@ -15742,6 +17044,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", @@ -15896,6 +17205,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", @@ -15987,6 +17303,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ilo", @@ -16120,6 +17443,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", @@ -16190,6 +17520,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", @@ -16225,6 +17562,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", @@ -16260,6 +17604,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", @@ -16295,6 +17646,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", @@ -16407,6 +17765,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", @@ -16442,6 +17807,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", @@ -16484,6 +17856,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", @@ -16519,6 +17898,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", @@ -16554,6 +17940,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", @@ -16596,6 +17989,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", @@ -16631,6 +18031,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", @@ -16722,6 +18129,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", @@ -16813,6 +18227,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", @@ -16848,6 +18269,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", @@ -16925,6 +18353,13 @@ "metric":"chrf", "score":0.5692291394 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", @@ -16932,6 +18367,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", @@ -16967,6 +18409,13 @@ "metric":"chrf", "score":0.6547073078 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", @@ -17121,6 +18570,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", @@ -17156,6 +18612,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", @@ -17247,6 +18710,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", @@ -17289,6 +18759,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", @@ -17331,6 +18808,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", @@ -17464,6 +18948,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", @@ -17625,6 +19116,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ti", @@ -17660,6 +19158,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", @@ -17702,6 +19207,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", @@ -17779,6 +19291,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", @@ -17814,6 +19333,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", @@ -17849,6 +19375,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", @@ -18073,6 +19606,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", @@ -18409,6 +19949,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", @@ -18521,6 +20068,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", @@ -18556,6 +20110,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", @@ -18591,6 +20152,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", @@ -18626,6 +20194,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"be", @@ -18661,6 +20236,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", @@ -18815,6 +20397,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", @@ -18850,6 +20439,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ckb", @@ -19130,6 +20726,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", @@ -19298,6 +20901,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", @@ -19382,6 +20992,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", @@ -19536,6 +21153,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", @@ -19627,6 +21251,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ilo", @@ -19760,6 +21391,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", @@ -19830,6 +21468,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", @@ -19865,6 +21510,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", @@ -19900,6 +21552,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", @@ -19935,6 +21594,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", @@ -20047,6 +21713,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", @@ -20082,6 +21755,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", @@ -20124,6 +21804,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", @@ -20159,6 +21846,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", @@ -20194,6 +21888,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", @@ -20236,6 +21937,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", @@ -20271,6 +21979,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", @@ -20362,6 +22077,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", @@ -20453,6 +22175,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", @@ -20488,6 +22217,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", @@ -20565,6 +22301,13 @@ "metric":"chrf", "score":0.540420297 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", @@ -20572,6 +22315,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", @@ -20607,6 +22357,13 @@ "metric":"chrf", "score":0.6710753294 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", @@ -20761,6 +22518,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", @@ -20796,6 +22560,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", @@ -20887,6 +22658,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", @@ -20929,6 +22707,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", @@ -20971,6 +22756,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", @@ -21104,6 +22896,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", @@ -21265,6 +23064,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ti", @@ -21300,6 +23106,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", @@ -21342,6 +23155,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", @@ -21419,6 +23239,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", @@ -21454,6 +23281,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", @@ -21489,6 +23323,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", @@ -21713,6 +23554,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", @@ -22049,6 +23897,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", @@ -22161,6 +24016,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", @@ -22196,6 +24058,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", @@ -22231,6 +24100,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", @@ -22266,6 +24142,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"be", @@ -22301,6 +24184,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", @@ -22455,6 +24345,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", @@ -22490,6 +24387,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ckb", @@ -22770,6 +24674,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", @@ -22938,6 +24849,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", @@ -23022,6 +24940,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", @@ -23176,6 +25101,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", @@ -23267,6 +25199,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ilo", @@ -23400,6 +25339,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", @@ -23470,6 +25416,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", @@ -23505,6 +25458,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", @@ -23540,6 +25500,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", @@ -23575,6 +25542,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", @@ -23687,6 +25661,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", @@ -23722,6 +25703,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", @@ -23764,6 +25752,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", @@ -23799,6 +25794,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", @@ -23834,6 +25836,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", @@ -23876,6 +25885,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", @@ -23911,6 +25927,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", @@ -24002,6 +26025,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", @@ -24093,6 +26123,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", @@ -24128,6 +26165,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", @@ -24205,6 +26249,13 @@ "metric":"chrf", "score":0.4560149918 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", @@ -24212,6 +26263,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", @@ -24247,6 +26305,13 @@ "metric":"chrf", "score":0.649967582 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", @@ -24401,6 +26466,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", @@ -24436,6 +26508,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", @@ -24527,6 +26606,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", @@ -24569,6 +26655,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", @@ -24611,6 +26704,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", @@ -24744,6 +26844,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", @@ -24905,6 +27012,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ti", @@ -24940,6 +27054,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", @@ -24982,6 +27103,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", @@ -25059,6 +27187,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", @@ -25094,6 +27229,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", @@ -25129,6 +27271,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", @@ -25353,6 +27502,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", @@ -27201,6 +29357,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", @@ -27313,6 +29476,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", @@ -27348,6 +29518,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", @@ -27383,6 +29560,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", @@ -27418,6 +29602,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"be", @@ -27453,6 +29644,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", @@ -27607,6 +29805,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", @@ -27642,6 +29847,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ckb", @@ -27922,6 +30134,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", @@ -28090,6 +30309,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", @@ -28174,6 +30400,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", @@ -28328,6 +30561,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", @@ -28419,6 +30659,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ilo", @@ -28552,6 +30799,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", @@ -28622,6 +30876,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", @@ -28657,6 +30918,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", @@ -28692,6 +30960,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", @@ -28727,6 +31002,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", @@ -28839,6 +31121,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", @@ -28874,6 +31163,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", @@ -28916,6 +31212,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", @@ -28951,6 +31254,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", @@ -28986,6 +31296,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", @@ -29028,6 +31345,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", @@ -29063,6 +31387,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", @@ -29154,6 +31485,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", @@ -29245,6 +31583,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", @@ -29280,6 +31625,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", @@ -29357,6 +31709,13 @@ "metric":"chrf", "score":0.579529149 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", @@ -29364,6 +31723,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", @@ -29399,6 +31765,13 @@ "metric":"chrf", "score":0.6966172136 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", @@ -29553,6 +31926,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", @@ -29588,6 +31968,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", @@ -29679,6 +32066,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", @@ -29721,6 +32115,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", @@ -29763,6 +32164,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", @@ -29896,6 +32304,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", @@ -30057,6 +32472,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ti", @@ -30092,6 +32514,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", @@ -30134,6 +32563,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", @@ -30211,6 +32647,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", @@ -30246,6 +32689,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", @@ -30281,6 +32731,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", @@ -30505,6 +32962,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", @@ -30841,6 +33305,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", @@ -30953,6 +33424,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", @@ -30988,6 +33466,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", @@ -31023,6 +33508,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", @@ -31058,6 +33550,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"be", @@ -31093,6 +33592,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", @@ -31247,6 +33753,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", @@ -31282,6 +33795,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ckb", @@ -31562,6 +34082,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", @@ -31730,6 +34257,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", @@ -31814,6 +34348,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", @@ -31968,6 +34509,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", @@ -32059,6 +34607,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ilo", @@ -32192,6 +34747,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", @@ -32262,6 +34824,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", @@ -32297,6 +34866,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", @@ -32332,6 +34908,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", @@ -32367,6 +34950,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", @@ -32479,6 +35069,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", @@ -32514,6 +35111,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", @@ -32556,6 +35160,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", @@ -32591,6 +35202,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", @@ -32626,6 +35244,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", @@ -32668,6 +35293,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", @@ -32703,6 +35335,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", @@ -32794,6 +35433,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", @@ -32885,6 +35531,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", @@ -32920,6 +35573,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", @@ -32997,6 +35657,13 @@ "metric":"chrf", "score":0.5847615168 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", @@ -33004,6 +35671,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", @@ -33039,6 +35713,13 @@ "metric":"chrf", "score":0.6588223005 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", @@ -33193,6 +35874,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", @@ -33228,6 +35916,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", @@ -33319,6 +36014,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", @@ -33361,6 +36063,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", @@ -33403,6 +36112,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", @@ -33536,6 +36252,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", @@ -33697,6 +36420,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ti", @@ -33732,6 +36462,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", @@ -33774,6 +36511,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", @@ -33851,6 +36595,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", @@ -33886,6 +36637,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", @@ -33921,6 +36679,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", @@ -34145,6 +36910,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", @@ -34446,6 +37218,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", @@ -34558,6 +37337,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", @@ -34593,6 +37379,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", @@ -34628,6 +37421,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", @@ -34663,6 +37463,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"be", @@ -34698,6 +37505,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", @@ -34782,6 +37596,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", @@ -34817,6 +37638,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ckb", @@ -35097,6 +37925,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", @@ -35265,6 +38100,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", @@ -35349,6 +38191,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", @@ -35468,6 +38317,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", @@ -35559,6 +38415,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ilo", @@ -35692,6 +38555,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", @@ -35727,6 +38597,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", @@ -35762,6 +38639,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", @@ -35797,6 +38681,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", @@ -35832,6 +38723,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", @@ -35944,6 +38842,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", @@ -35979,6 +38884,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", @@ -36021,6 +38933,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", @@ -36056,6 +38975,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", @@ -36091,6 +39017,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", @@ -36133,6 +39066,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", @@ -36168,6 +39108,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", @@ -36259,6 +39206,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", @@ -36350,6 +39304,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", @@ -36385,6 +39346,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", @@ -36462,6 +39430,13 @@ "metric":"chrf", "score":0.5993345379 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", @@ -36469,6 +39444,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", @@ -36504,6 +39486,13 @@ "metric":"chrf", "score":0.6755172019 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", @@ -36658,6 +39647,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", @@ -36693,6 +39689,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", @@ -36784,6 +39787,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", @@ -36826,6 +39836,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", @@ -36868,6 +39885,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", @@ -37001,6 +40025,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", @@ -37127,6 +40158,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ti", @@ -37162,6 +40200,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", @@ -37204,6 +40249,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", @@ -37246,6 +40298,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", @@ -37281,6 +40340,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", @@ -37316,6 +40382,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", @@ -37540,6 +40613,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", @@ -37841,6 +40921,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ar", @@ -37953,6 +41040,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"as", @@ -37988,6 +41082,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"awa", @@ -38023,6 +41124,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"az", @@ -38058,6 +41166,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"be", @@ -38093,6 +41208,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"bho", @@ -38177,6 +41299,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ceb", @@ -38212,6 +41341,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ckb", @@ -38492,6 +41628,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"fa", @@ -38660,6 +41803,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"gu", @@ -38744,6 +41894,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"hi", @@ -38863,6 +42020,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"id", @@ -38954,6 +42118,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ilo", @@ -39087,6 +42258,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"jv", @@ -39122,6 +42300,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"kk", @@ -39157,6 +42342,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"km", @@ -39192,6 +42384,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"kn", @@ -39227,6 +42426,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ko", @@ -39339,6 +42545,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"mai", @@ -39374,6 +42587,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"mg", @@ -39416,6 +42636,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ml", @@ -39451,6 +42678,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"mr", @@ -39486,6 +42720,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ms", @@ -39528,6 +42769,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"my", @@ -39563,6 +42811,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ne", @@ -39654,6 +42909,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ny", @@ -39745,6 +43007,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"or", @@ -39780,6 +43049,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"pa", @@ -39857,6 +43133,13 @@ "metric":"chrf", "score":0.5954877296 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"pt", @@ -39864,6 +43147,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"pt", @@ -39899,6 +43189,13 @@ "metric":"chrf", "score":0.7255352889 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ro", @@ -40053,6 +43350,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"sd", @@ -40088,6 +43392,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"si", @@ -40179,6 +43490,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"so", @@ -40221,6 +43539,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"sr", @@ -40263,6 +43588,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"su", @@ -40396,6 +43728,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ta", @@ -40522,6 +43861,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ti", @@ -40557,6 +43903,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"tr", @@ -40599,6 +43952,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"uk", @@ -40641,6 +44001,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"ur", @@ -40676,6 +44043,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"uz", @@ -40711,6 +44085,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"vi", @@ -40935,6 +44316,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-flash-lite-preview-06-17", "bcp_47":"yue", @@ -41271,6 +44659,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ar", @@ -41383,6 +44778,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"as", @@ -41418,6 +44820,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"awa", @@ -41453,6 +44862,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"az", @@ -41488,6 +44904,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"be", @@ -41523,6 +44946,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"bho", @@ -41677,6 +45107,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ceb", @@ -41712,6 +45149,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ckb", @@ -41992,6 +45436,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"fa", @@ -42160,6 +45611,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"gu", @@ -42244,6 +45702,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"hi", @@ -42398,6 +45863,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"id", @@ -42489,6 +45961,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ilo", @@ -42622,6 +46101,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"jv", @@ -42692,6 +46178,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"kk", @@ -42727,6 +46220,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"km", @@ -42762,6 +46262,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"kn", @@ -42797,6 +46304,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ko", @@ -42909,6 +46423,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mai", @@ -42944,6 +46465,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mg", @@ -42986,6 +46514,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ml", @@ -43021,6 +46556,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"mr", @@ -43056,6 +46598,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ms", @@ -43098,6 +46647,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"my", @@ -43133,6 +46689,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ne", @@ -43224,6 +46787,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ny", @@ -43315,6 +46885,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"or", @@ -43350,6 +46927,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pa", @@ -43427,6 +47011,13 @@ "metric":"chrf", "score":0.6077722316 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pt", @@ -43434,6 +47025,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"pt", @@ -43469,6 +47067,13 @@ "metric":"chrf", "score":0.7045563325 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ro", @@ -43623,6 +47228,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sd", @@ -43658,6 +47270,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"si", @@ -43749,6 +47368,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"so", @@ -43791,6 +47417,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"sr", @@ -43833,6 +47466,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"su", @@ -43966,6 +47606,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ta", @@ -44127,6 +47774,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ti", @@ -44162,6 +47816,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"tr", @@ -44204,6 +47865,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"uk", @@ -44281,6 +47949,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"ur", @@ -44316,6 +47991,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"uz", @@ -44351,6 +48033,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"vi", @@ -44575,6 +48264,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-flash-preview", "bcp_47":"yue", @@ -44911,6 +48607,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ar", @@ -45023,6 +48726,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"as", @@ -45058,6 +48768,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"awa", @@ -45093,6 +48810,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"az", @@ -45128,6 +48852,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"be", @@ -45163,6 +48894,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"bho", @@ -45317,6 +49055,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ceb", @@ -45352,6 +49097,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ckb", @@ -45632,6 +49384,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"fa", @@ -45800,6 +49559,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"gu", @@ -45884,6 +49650,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"hi", @@ -46038,6 +49811,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"id", @@ -46129,6 +49909,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ilo", @@ -46262,6 +50049,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"jv", @@ -46332,6 +50126,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"kk", @@ -46367,6 +50168,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"km", @@ -46402,6 +50210,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"kn", @@ -46437,6 +50252,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ko", @@ -46549,6 +50371,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mai", @@ -46584,6 +50413,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mg", @@ -46626,6 +50462,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ml", @@ -46661,6 +50504,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"mr", @@ -46696,6 +50546,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ms", @@ -46738,6 +50595,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"my", @@ -46773,6 +50637,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ne", @@ -46864,6 +50735,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ny", @@ -46955,6 +50833,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"or", @@ -46990,6 +50875,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pa", @@ -47067,6 +50959,13 @@ "metric":"chrf", "score":0.5940500139 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pt", @@ -47074,6 +50973,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"pt", @@ -47109,6 +51015,13 @@ "metric":"chrf", "score":0.6891420805 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ro", @@ -47263,6 +51176,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sd", @@ -47298,6 +51218,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"si", @@ -47389,6 +51316,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"so", @@ -47431,6 +51365,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"sr", @@ -47473,6 +51414,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"su", @@ -47606,6 +51554,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ta", @@ -47767,6 +51722,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ti", @@ -47802,6 +51764,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"tr", @@ -47844,6 +51813,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"uk", @@ -47921,6 +51897,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"ur", @@ -47956,6 +51939,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"uz", @@ -47991,6 +51981,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"vi", @@ -48215,6 +52212,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash-preview-05-20", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-flash-preview-05-20", "bcp_47":"yue", @@ -50791,6 +54795,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ar", @@ -50903,6 +54914,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"as", @@ -50938,6 +54956,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"awa", @@ -50973,6 +54998,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"az", @@ -51008,6 +55040,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"be", @@ -51043,6 +55082,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"bho", @@ -51197,6 +55243,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ceb", @@ -51232,6 +55285,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ckb", @@ -51512,6 +55572,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"fa", @@ -51680,6 +55747,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"gu", @@ -51764,6 +55838,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"hi", @@ -51918,6 +55999,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"id", @@ -52009,6 +56097,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ilo", @@ -52142,6 +56237,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"jv", @@ -52212,6 +56314,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"kk", @@ -52247,6 +56356,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"km", @@ -52282,6 +56398,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"kn", @@ -52317,6 +56440,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ko", @@ -52429,6 +56559,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"mai", @@ -52464,6 +56601,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"mg", @@ -52506,6 +56650,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ml", @@ -52541,6 +56692,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"mr", @@ -52576,6 +56734,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ms", @@ -52618,6 +56783,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"my", @@ -52653,6 +56825,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ne", @@ -52744,6 +56923,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ny", @@ -52835,6 +57021,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"or", @@ -52870,6 +57063,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"pa", @@ -52947,6 +57147,13 @@ "metric":"chrf", "score":0.1925827805 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"pt", @@ -52954,6 +57161,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"pt", @@ -52989,6 +57203,13 @@ "metric":"chrf", "score":0.2615431845 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ro", @@ -53143,6 +57364,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"sd", @@ -53178,6 +57406,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"si", @@ -53269,6 +57504,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"so", @@ -53311,6 +57553,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"sr", @@ -53353,6 +57602,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"su", @@ -53486,6 +57742,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ta", @@ -53647,6 +57910,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ti", @@ -53682,6 +57952,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"tr", @@ -53724,6 +58001,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"uk", @@ -53801,6 +58085,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"ur", @@ -53836,6 +58127,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"uz", @@ -53871,6 +58169,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"vi", @@ -54095,6 +58400,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.5-pro-preview-05-06", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-2.5-pro-preview-05-06", "bcp_47":"yue", @@ -54431,6 +58743,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", @@ -54543,6 +58862,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", @@ -54578,6 +58904,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", @@ -54613,6 +58946,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", @@ -54648,6 +58988,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"be", @@ -54683,6 +59030,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", @@ -54837,6 +59191,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", @@ -54872,6 +59233,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", @@ -55152,6 +59520,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fa", @@ -55320,6 +59695,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", @@ -55404,6 +59786,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hi", @@ -55558,6 +59947,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"id", @@ -55649,6 +60045,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", @@ -55782,6 +60185,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", @@ -55852,6 +60262,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kk", @@ -55887,6 +60304,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", @@ -55922,6 +60346,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", @@ -55957,6 +60388,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ko", @@ -56069,6 +60507,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", @@ -56104,6 +60549,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mg", @@ -56146,6 +60598,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", @@ -56181,6 +60640,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", @@ -56216,6 +60682,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ms", @@ -56258,6 +60731,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", @@ -56293,6 +60773,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ne", @@ -56384,6 +60871,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ny", @@ -56475,6 +60969,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", @@ -56510,6 +61011,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", @@ -56587,6 +61095,13 @@ "metric":"chrf", "score":0.6320824157 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", @@ -56594,6 +61109,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", @@ -56629,6 +61151,13 @@ "metric":"chrf", "score":0.6844540285 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ro", @@ -56783,6 +61312,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", @@ -56818,6 +61354,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"si", @@ -56909,6 +61452,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"so", @@ -56951,6 +61501,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sr", @@ -56993,6 +61550,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", @@ -57126,6 +61690,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", @@ -57287,6 +61858,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ti", @@ -57322,6 +61900,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tr", @@ -57364,6 +61949,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uk", @@ -57441,6 +62033,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", @@ -57476,6 +62075,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", @@ -57511,6 +62117,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", @@ -57735,6 +62348,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", @@ -58071,6 +62691,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", @@ -58183,6 +62810,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", @@ -58218,6 +62852,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", @@ -58253,6 +62894,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", @@ -58288,6 +62936,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", @@ -58323,6 +62978,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", @@ -58477,6 +63139,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", @@ -58512,6 +63181,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", @@ -58792,6 +63468,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", @@ -58960,6 +63643,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", @@ -59044,6 +63734,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", @@ -59198,6 +63895,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", @@ -59289,6 +63993,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", @@ -59422,6 +64133,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", @@ -59492,6 +64210,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", @@ -59527,6 +64252,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", @@ -59562,6 +64294,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", @@ -59597,6 +64336,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", @@ -59709,6 +64455,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", @@ -59744,6 +64497,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", @@ -59786,6 +64546,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", @@ -59821,6 +64588,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", @@ -59856,6 +64630,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", @@ -59898,6 +64679,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", @@ -59933,6 +64721,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", @@ -60024,6 +64819,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", @@ -60115,6 +64917,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", @@ -60150,6 +64959,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", @@ -60227,6 +65043,13 @@ "metric":"chrf", "score":0.5924241261 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", @@ -60234,6 +65057,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", @@ -60269,6 +65099,13 @@ "metric":"chrf", "score":0.6509885745 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", @@ -60423,6 +65260,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", @@ -60458,6 +65302,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", @@ -60549,6 +65400,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", @@ -60591,6 +65449,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", @@ -60633,6 +65498,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", @@ -60766,6 +65638,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", @@ -60927,6 +65806,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", @@ -60962,6 +65848,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", @@ -61004,6 +65897,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", @@ -61081,6 +65981,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", @@ -61116,6 +66023,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", @@ -61151,6 +66065,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", @@ -61375,6 +66296,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", @@ -61711,6 +66639,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", @@ -61823,6 +66758,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", @@ -61858,6 +66800,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", @@ -61893,6 +66842,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", @@ -61928,6 +66884,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"be", @@ -61963,6 +66926,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", @@ -62117,6 +67087,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", @@ -62152,6 +67129,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ckb", @@ -62432,6 +67416,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fa", @@ -62600,6 +67591,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", @@ -62684,6 +67682,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hi", @@ -62838,6 +67843,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"id", @@ -62929,6 +67941,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ilo", @@ -63062,6 +68081,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", @@ -63132,6 +68158,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kk", @@ -63167,6 +68200,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", @@ -63202,6 +68242,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", @@ -63237,6 +68284,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ko", @@ -63349,6 +68403,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", @@ -63384,6 +68445,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mg", @@ -63426,6 +68494,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", @@ -63461,6 +68536,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", @@ -63496,6 +68578,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ms", @@ -63538,6 +68627,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", @@ -63573,6 +68669,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ne", @@ -63664,6 +68767,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ny", @@ -63755,6 +68865,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", @@ -63790,6 +68907,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", @@ -63867,6 +68991,13 @@ "metric":"chrf", "score":0.5891983505 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", @@ -63874,6 +69005,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", @@ -63909,6 +69047,13 @@ "metric":"chrf", "score":0.6903236014 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ro", @@ -64063,6 +69208,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", @@ -64098,6 +69250,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"si", @@ -64189,6 +69348,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"so", @@ -64231,6 +69397,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sr", @@ -64273,6 +69446,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", @@ -64406,6 +69586,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", @@ -64567,6 +69754,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ti", @@ -64602,6 +69796,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tr", @@ -64644,6 +69845,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uk", @@ -64721,6 +69929,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", @@ -64756,6 +69971,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", @@ -64791,6 +70013,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", @@ -65015,6 +70244,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", @@ -67647,6 +72883,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", @@ -67759,6 +73002,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", @@ -67794,6 +73044,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", @@ -67829,6 +73086,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", @@ -67864,6 +73128,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", @@ -67899,6 +73170,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", @@ -68053,6 +73331,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", @@ -68088,6 +73373,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", @@ -68368,6 +73660,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", @@ -68536,6 +73835,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", @@ -68620,6 +73926,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", @@ -68774,6 +74087,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", @@ -68865,6 +74185,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", @@ -68998,6 +74325,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", @@ -69068,6 +74402,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", @@ -69103,6 +74444,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", @@ -69138,6 +74486,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", @@ -69173,6 +74528,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", @@ -69285,6 +74647,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", @@ -69320,6 +74689,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", @@ -69362,6 +74738,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", @@ -69397,6 +74780,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", @@ -69432,6 +74822,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", @@ -69474,6 +74871,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", @@ -69509,6 +74913,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", @@ -69600,6 +75011,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", @@ -69691,6 +75109,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", @@ -69726,6 +75151,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", @@ -69803,6 +75235,13 @@ "metric":"chrf", "score":0.3907504991 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", @@ -69810,6 +75249,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", @@ -69845,6 +75291,13 @@ "metric":"chrf", "score":0.4401829864 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", @@ -69999,6 +75452,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", @@ -70034,6 +75494,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", @@ -70125,6 +75592,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", @@ -70167,6 +75641,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", @@ -70209,6 +75690,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", @@ -70342,6 +75830,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", @@ -70503,6 +75998,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", @@ -70538,6 +76040,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", @@ -70580,6 +76089,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", @@ -70657,6 +76173,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", @@ -70692,6 +76215,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", @@ -70727,6 +76257,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", @@ -70951,6 +76488,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", @@ -71287,6 +76831,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", @@ -71399,6 +76950,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", @@ -71434,6 +76992,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", @@ -71469,6 +77034,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", @@ -71504,6 +77076,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", @@ -71539,6 +77118,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", @@ -71693,6 +77279,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", @@ -71728,6 +77321,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", @@ -72008,6 +77608,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", @@ -72176,6 +77783,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", @@ -72260,6 +77874,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", @@ -72414,6 +78035,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", @@ -72505,6 +78133,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", @@ -72638,6 +78273,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", @@ -72708,6 +78350,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", @@ -72743,6 +78392,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", @@ -72778,6 +78434,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", @@ -72813,6 +78476,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", @@ -72925,6 +78595,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", @@ -72960,6 +78637,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", @@ -73002,6 +78686,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", @@ -73037,6 +78728,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", @@ -73072,6 +78770,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", @@ -73114,6 +78819,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", @@ -73149,6 +78861,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", @@ -73240,6 +78959,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", @@ -73331,6 +79057,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", @@ -73366,6 +79099,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", @@ -73443,6 +79183,13 @@ "metric":"chrf", "score":0.5541703282 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", @@ -73450,6 +79197,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", @@ -73485,6 +79239,13 @@ "metric":"chrf", "score":0.6132478102 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", @@ -73639,6 +79400,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", @@ -73674,6 +79442,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", @@ -73765,6 +79540,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", @@ -73807,6 +79589,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", @@ -73849,6 +79638,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", @@ -73982,6 +79778,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", @@ -74143,6 +79946,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", @@ -74178,6 +79988,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", @@ -74220,6 +80037,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", @@ -74297,6 +80121,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", @@ -74332,6 +80163,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", @@ -74367,6 +80205,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", @@ -74591,6 +80436,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", @@ -74927,6 +80779,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", @@ -75039,6 +80898,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", @@ -75074,6 +80940,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", @@ -75109,6 +80982,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", @@ -75144,6 +81024,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", @@ -75179,6 +81066,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", @@ -75333,6 +81227,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", @@ -75368,6 +81269,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", @@ -75648,6 +81556,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", @@ -75816,6 +81731,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", @@ -75900,6 +81822,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", @@ -76054,6 +81983,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", @@ -76145,6 +82081,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", @@ -76278,6 +82221,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", @@ -76348,6 +82298,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", @@ -76383,6 +82340,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", @@ -76418,6 +82382,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", @@ -76453,6 +82424,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", @@ -76565,6 +82543,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", @@ -76600,6 +82585,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", @@ -76642,6 +82634,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", @@ -76677,6 +82676,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", @@ -76712,6 +82718,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", @@ -76754,6 +82767,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", @@ -76789,6 +82809,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", @@ -76880,6 +82907,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", @@ -76971,6 +83005,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", @@ -77006,6 +83047,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", @@ -77083,6 +83131,13 @@ "metric":"chrf", "score":0.5569354008 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", @@ -77090,6 +83145,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", @@ -77125,6 +83187,13 @@ "metric":"chrf", "score":0.6761551234 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", @@ -77279,6 +83348,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", @@ -77314,6 +83390,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", @@ -77405,6 +83488,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", @@ -77447,6 +83537,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", @@ -77489,6 +83586,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", @@ -77622,6 +83726,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", @@ -77783,6 +83894,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", @@ -77818,6 +83936,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", @@ -77860,6 +83985,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", @@ -77937,6 +84069,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", @@ -77972,6 +84111,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", @@ -78007,6 +84153,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", @@ -78231,6 +84384,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", @@ -83047,6 +89207,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", @@ -83159,6 +89326,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", @@ -83194,6 +89368,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", @@ -83229,6 +89410,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", @@ -83264,6 +89452,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", @@ -83299,6 +89494,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", @@ -83453,6 +89655,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", @@ -83488,6 +89697,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", @@ -83768,6 +89984,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", @@ -83936,6 +90159,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", @@ -84020,6 +90250,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", @@ -84174,6 +90411,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", @@ -84265,6 +90509,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", @@ -84398,6 +90649,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", @@ -84468,6 +90726,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", @@ -84503,6 +90768,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", @@ -84538,6 +90810,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", @@ -84573,6 +90852,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", @@ -84685,6 +90971,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", @@ -84720,6 +91013,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", @@ -84762,6 +91062,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", @@ -84797,6 +91104,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", @@ -84832,6 +91146,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", @@ -84874,6 +91195,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", @@ -84909,6 +91237,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", @@ -85000,6 +91335,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", @@ -85091,6 +91433,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", @@ -85126,6 +91475,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", @@ -85203,6 +91559,13 @@ "metric":"chrf", "score":0.5550868321 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", @@ -85210,6 +91573,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", @@ -85245,6 +91615,13 @@ "metric":"chrf", "score":0.6712742861 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", @@ -85399,6 +91776,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", @@ -85434,6 +91818,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", @@ -85525,6 +91916,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", @@ -85567,6 +91965,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", @@ -85609,6 +92014,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", @@ -85742,6 +92154,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", @@ -85903,6 +92322,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", @@ -85938,6 +92364,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", @@ -85980,6 +92413,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", @@ -86057,6 +92497,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", @@ -86092,6 +92539,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", @@ -86127,6 +92581,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", @@ -86351,6 +92812,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", @@ -86687,6 +93155,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", @@ -86799,6 +93274,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", @@ -86834,6 +93316,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", @@ -86869,6 +93358,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", @@ -86904,6 +93400,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", @@ -86939,6 +93442,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", @@ -87093,6 +93603,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", @@ -87128,6 +93645,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", @@ -87408,6 +93932,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", @@ -87576,6 +94107,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", @@ -87660,6 +94198,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", @@ -87814,6 +94359,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", @@ -87905,6 +94457,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", @@ -88038,6 +94597,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", @@ -88108,6 +94674,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", @@ -88143,6 +94716,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", @@ -88178,6 +94758,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", @@ -88213,6 +94800,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", @@ -88325,6 +94919,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", @@ -88360,6 +94961,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", @@ -88402,6 +95010,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", @@ -88437,6 +95052,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", @@ -88472,6 +95094,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", @@ -88514,6 +95143,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", @@ -88549,6 +95185,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", @@ -88640,6 +95283,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", @@ -88731,6 +95381,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", @@ -88766,6 +95423,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", @@ -88843,6 +95507,13 @@ "metric":"chrf", "score":0.5407300006 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", @@ -88850,6 +95521,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", @@ -88885,6 +95563,13 @@ "metric":"chrf", "score":0.6491023878 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", @@ -89039,6 +95724,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", @@ -89074,6 +95766,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", @@ -89165,6 +95864,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", @@ -89207,6 +95913,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", @@ -89249,6 +95962,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", @@ -89382,6 +96102,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", @@ -89543,6 +96270,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", @@ -89578,6 +96312,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", @@ -89620,6 +96361,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", @@ -89697,6 +96445,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", @@ -89732,6 +96487,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", @@ -89767,6 +96529,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", @@ -89991,6 +96760,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", @@ -90327,6 +97103,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"ar", @@ -90439,6 +97222,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"as", @@ -90474,6 +97264,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"microsoft\/phi-4", "bcp_47":"awa", @@ -90509,6 +97306,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"az", @@ -90544,6 +97348,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"be", @@ -90579,6 +97390,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"bho", @@ -90733,6 +97551,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", @@ -90768,6 +97593,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4", "bcp_47":"ckb", @@ -91048,6 +97880,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"fa", @@ -91216,6 +98055,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4", "bcp_47":"gu", @@ -91300,6 +98146,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"hi", @@ -91454,6 +98307,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"microsoft\/phi-4", "bcp_47":"id", @@ -91545,6 +98405,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4", "bcp_47":"ilo", @@ -91678,6 +98545,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"jv", @@ -91748,6 +98622,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4", "bcp_47":"kk", @@ -91783,6 +98664,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"km", @@ -91818,6 +98706,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"kn", @@ -91853,6 +98748,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"microsoft\/phi-4", "bcp_47":"ko", @@ -91965,6 +98867,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"mai", @@ -92000,6 +98909,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"mg", @@ -92042,6 +98958,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"ml", @@ -92077,6 +99000,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"mr", @@ -92112,6 +99042,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"ms", @@ -92154,6 +99091,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4", "bcp_47":"my", @@ -92189,6 +99133,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4", "bcp_47":"ne", @@ -92280,6 +99231,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"ny", @@ -92371,6 +99329,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4", "bcp_47":"or", @@ -92406,6 +99371,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"pa", @@ -92483,6 +99455,13 @@ "metric":"chrf", "score":0.4754992095 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"microsoft\/phi-4", "bcp_47":"pt", @@ -92490,6 +99469,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"pt", @@ -92525,6 +99511,13 @@ "metric":"chrf", "score":0.5827805827 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"ro", @@ -92679,6 +99672,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4", "bcp_47":"sd", @@ -92714,6 +99714,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4", "bcp_47":"si", @@ -92805,6 +99812,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4", "bcp_47":"so", @@ -92847,6 +99861,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"microsoft\/phi-4", "bcp_47":"sr", @@ -92889,6 +99910,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4", "bcp_47":"su", @@ -93022,6 +100050,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4", "bcp_47":"ta", @@ -93183,6 +100218,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4", "bcp_47":"ti", @@ -93218,6 +100260,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"microsoft\/phi-4", "bcp_47":"tr", @@ -93260,6 +100309,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"microsoft\/phi-4", "bcp_47":"uk", @@ -93337,6 +100393,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"microsoft\/phi-4", "bcp_47":"ur", @@ -93372,6 +100435,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4", "bcp_47":"uz", @@ -93407,6 +100477,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"microsoft\/phi-4", "bcp_47":"vi", @@ -93631,6 +100708,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4", "bcp_47":"yue", @@ -93967,6 +101051,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", @@ -94079,6 +101170,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", @@ -94114,6 +101212,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", @@ -94149,6 +101254,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", @@ -94184,6 +101296,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", @@ -94219,6 +101338,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", @@ -94373,6 +101499,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", @@ -94408,6 +101541,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", @@ -94688,6 +101828,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", @@ -94856,6 +102003,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", @@ -94940,6 +102094,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", @@ -95094,6 +102255,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", @@ -95185,6 +102353,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", @@ -95318,6 +102493,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", @@ -95388,6 +102570,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", @@ -95423,6 +102612,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", @@ -95458,6 +102654,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", @@ -95493,6 +102696,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", @@ -95605,6 +102815,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", @@ -95640,6 +102857,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", @@ -95682,6 +102906,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", @@ -95717,6 +102948,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", @@ -95752,6 +102990,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", @@ -95794,6 +103039,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", @@ -95829,6 +103081,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", @@ -95920,6 +103179,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", @@ -96011,6 +103277,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", @@ -96046,6 +103319,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", @@ -96123,6 +103403,13 @@ "metric":"chrf", "score":0.2865629267 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", @@ -96130,6 +103417,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", @@ -96165,6 +103459,13 @@ "metric":"chrf", "score":0.3618255907 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", @@ -96319,6 +103620,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", @@ -96354,6 +103662,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", @@ -96445,6 +103760,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"so", @@ -96487,6 +103809,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sr", @@ -96529,6 +103858,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", @@ -96662,6 +103998,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", @@ -96823,6 +104166,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ti", @@ -96858,6 +104208,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tr", @@ -96900,6 +104257,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uk", @@ -96977,6 +104341,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", @@ -97012,6 +104383,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", @@ -97047,6 +104425,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", @@ -97271,6 +104656,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", @@ -97607,6 +104999,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", @@ -97719,6 +105118,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", @@ -97754,6 +105160,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", @@ -97789,6 +105202,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", @@ -97824,6 +105244,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"be", @@ -97859,6 +105286,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", @@ -98013,6 +105447,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", @@ -98048,6 +105489,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", @@ -98328,6 +105776,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fa", @@ -98496,6 +105951,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", @@ -98580,6 +106042,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", @@ -98734,6 +106203,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"id", @@ -98825,6 +106301,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", @@ -98958,6 +106441,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", @@ -99028,6 +106518,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kk", @@ -99063,6 +106560,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", @@ -99098,6 +106602,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", @@ -99133,6 +106644,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ko", @@ -99245,6 +106763,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", @@ -99280,6 +106805,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mg", @@ -99322,6 +106854,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", @@ -99357,6 +106896,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", @@ -99392,6 +106938,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ms", @@ -99434,6 +106987,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", @@ -99469,6 +107029,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ne", @@ -99560,6 +107127,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ny", @@ -99651,6 +107225,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", @@ -99686,6 +107267,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", @@ -99763,6 +107351,13 @@ "metric":"chrf", "score":0.4488014348 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", @@ -99770,6 +107365,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", @@ -99805,6 +107407,13 @@ "metric":"chrf", "score":0.5835851988 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ro", @@ -99959,6 +107568,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", @@ -99994,6 +107610,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"si", @@ -100085,6 +107708,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"so", @@ -100127,6 +107757,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sr", @@ -100169,6 +107806,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", @@ -100302,6 +107946,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", @@ -100463,6 +108114,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ti", @@ -100498,6 +108156,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tr", @@ -100540,6 +108205,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uk", @@ -100617,6 +108289,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", @@ -100652,6 +108331,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", @@ -100687,6 +108373,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", @@ -100911,6 +108604,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", @@ -101247,6 +108947,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", @@ -101359,6 +109066,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", @@ -101394,6 +109108,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", @@ -101429,6 +109150,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", @@ -101464,6 +109192,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"be", @@ -101499,6 +109234,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", @@ -101653,6 +109395,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", @@ -101688,6 +109437,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ckb", @@ -101968,6 +109724,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"fa", @@ -102136,6 +109899,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", @@ -102220,6 +109990,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"hi", @@ -102374,6 +110151,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"id", @@ -102465,6 +110249,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ilo", @@ -102598,6 +110389,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", @@ -102668,6 +110466,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"kk", @@ -102703,6 +110508,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", @@ -102738,6 +110550,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", @@ -102773,6 +110592,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ko", @@ -102885,6 +110711,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", @@ -102920,6 +110753,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"mg", @@ -102962,6 +110802,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", @@ -102997,6 +110844,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", @@ -103032,6 +110886,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ms", @@ -103074,6 +110935,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", @@ -103109,6 +110977,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ne", @@ -103200,6 +111075,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ny", @@ -103291,6 +111173,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", @@ -103326,6 +111215,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", @@ -103403,6 +111299,13 @@ "metric":"chrf", "score":0.525989117 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", @@ -103410,6 +111313,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", @@ -103445,6 +111355,13 @@ "metric":"chrf", "score":0.6843169799 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ro", @@ -103599,6 +111516,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", @@ -103634,6 +111558,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"si", @@ -103725,6 +111656,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"so", @@ -103767,6 +111705,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"sr", @@ -103809,6 +111754,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", @@ -103942,6 +111894,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", @@ -104103,6 +112062,13 @@ "metric":"accuracy", "score":0.2 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ti", @@ -104138,6 +112104,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"tr", @@ -104180,6 +112153,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"uk", @@ -104257,6 +112237,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", @@ -104292,6 +112279,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", @@ -104327,6 +112321,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", @@ -104551,6 +112552,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", @@ -104887,6 +112895,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", @@ -104999,6 +113014,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", @@ -105034,6 +113056,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", @@ -105069,6 +113098,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", @@ -105104,6 +113140,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", @@ -105139,6 +113182,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", @@ -105293,6 +113343,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", @@ -105328,6 +113385,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", @@ -105608,6 +113672,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", @@ -105776,6 +113847,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", @@ -105860,6 +113938,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", @@ -106014,6 +114099,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", @@ -106105,6 +114197,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", @@ -106238,6 +114337,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", @@ -106308,6 +114414,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", @@ -106343,6 +114456,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", @@ -106378,6 +114498,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", @@ -106413,6 +114540,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", @@ -106525,6 +114659,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", @@ -106560,6 +114701,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", @@ -106602,6 +114750,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", @@ -106637,6 +114792,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", @@ -106672,6 +114834,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", @@ -106714,6 +114883,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", @@ -106749,6 +114925,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", @@ -106840,6 +115023,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", @@ -106931,6 +115121,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", @@ -106966,6 +115163,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", @@ -107043,6 +115247,13 @@ "metric":"chrf", "score":0.4948927457 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", @@ -107050,6 +115261,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", @@ -107085,6 +115303,13 @@ "metric":"chrf", "score":0.6102742767 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", @@ -107239,6 +115464,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", @@ -107274,6 +115506,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", @@ -107365,6 +115604,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", @@ -107407,6 +115653,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", @@ -107449,6 +115702,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", @@ -107582,6 +115842,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", @@ -107743,6 +116010,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", @@ -107778,6 +116052,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", @@ -107820,6 +116101,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", @@ -107897,6 +116185,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", @@ -107932,6 +116227,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", @@ -107967,6 +116269,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", @@ -108191,6 +116500,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", @@ -112167,6 +120483,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", @@ -112279,6 +120602,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", @@ -112314,6 +120644,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", @@ -112349,6 +120686,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", @@ -112384,6 +120728,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"be", @@ -112419,6 +120770,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", @@ -112573,6 +120931,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", @@ -112608,6 +120973,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", @@ -112888,6 +121260,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", @@ -113056,6 +121435,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", @@ -113140,6 +121526,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", @@ -113294,6 +121687,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"id", @@ -113385,6 +121785,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", @@ -113518,6 +121925,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", @@ -113588,6 +122002,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", @@ -113623,6 +122044,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", @@ -113658,6 +122086,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", @@ -113693,6 +122128,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", @@ -113805,6 +122247,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", @@ -113840,6 +122289,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", @@ -113882,6 +122338,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", @@ -113917,6 +122380,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", @@ -113952,6 +122422,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", @@ -113994,6 +122471,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", @@ -114029,6 +122513,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", @@ -114120,6 +122611,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", @@ -114211,6 +122709,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", @@ -114246,6 +122751,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", @@ -114323,6 +122835,13 @@ "metric":"chrf", "score":0.5674744623 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", @@ -114330,6 +122849,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", @@ -114365,6 +122891,13 @@ "metric":"chrf", "score":0.6526848356 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", @@ -114519,6 +123052,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", @@ -114554,6 +123094,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"si", @@ -114645,6 +123192,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"so", @@ -114687,6 +123241,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", @@ -114729,6 +123290,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", @@ -114862,6 +123430,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", @@ -115023,6 +123598,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", @@ -115058,6 +123640,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", @@ -115100,6 +123689,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", @@ -115177,6 +123773,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", @@ -115212,6 +123815,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", @@ -115247,6 +123857,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", @@ -115471,6 +124088,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", @@ -115807,6 +124431,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", @@ -115919,6 +124550,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", @@ -115954,6 +124592,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", @@ -115989,6 +124634,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", @@ -116024,6 +124676,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"be", @@ -116059,6 +124718,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", @@ -116213,6 +124879,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", @@ -116248,6 +124921,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", @@ -116528,6 +125208,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", @@ -116696,6 +125383,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", @@ -116780,6 +125474,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", @@ -116934,6 +125635,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"id", @@ -117025,6 +125733,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", @@ -117158,6 +125873,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", @@ -117228,6 +125950,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", @@ -117263,6 +125992,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", @@ -117298,6 +126034,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", @@ -117333,6 +126076,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", @@ -117445,6 +126195,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", @@ -117480,6 +126237,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", @@ -117522,6 +126286,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", @@ -117557,6 +126328,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", @@ -117592,6 +126370,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", @@ -117634,6 +126419,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", @@ -117669,6 +126461,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", @@ -117760,6 +126559,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", @@ -117851,6 +126657,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", @@ -117886,6 +126699,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", @@ -117963,6 +126783,13 @@ "metric":"chrf", "score":0.5592753275 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", @@ -117970,6 +126797,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", @@ -118005,6 +126839,13 @@ "metric":"chrf", "score":0.6189446172 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", @@ -118159,6 +127000,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", @@ -118194,6 +127042,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"si", @@ -118285,6 +127140,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"so", @@ -118327,6 +127189,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", @@ -118369,6 +127238,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", @@ -118502,6 +127378,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", @@ -118663,6 +127546,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", @@ -118698,6 +127588,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", @@ -118740,6 +127637,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", @@ -118817,6 +127721,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", @@ -118852,6 +127763,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", @@ -118887,6 +127805,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", @@ -119111,6 +128036,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", @@ -119447,6 +128379,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", @@ -119559,6 +128498,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", @@ -119594,6 +128540,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", @@ -119629,6 +128582,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", @@ -119664,6 +128624,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"be", @@ -119699,6 +128666,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", @@ -119853,6 +128827,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", @@ -119888,6 +128869,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", @@ -120168,6 +129156,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fa", @@ -120336,6 +129331,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", @@ -120420,6 +129422,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hi", @@ -120574,6 +129583,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"id", @@ -120665,6 +129681,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", @@ -120798,6 +129821,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", @@ -120868,6 +129898,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kk", @@ -120903,6 +129940,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", @@ -120938,6 +129982,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", @@ -120973,6 +130024,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ko", @@ -121085,6 +130143,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", @@ -121120,6 +130185,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mg", @@ -121162,6 +130234,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", @@ -121197,6 +130276,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", @@ -121232,6 +130318,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ms", @@ -121274,6 +130367,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", @@ -121309,6 +130409,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ne", @@ -121400,6 +130507,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ny", @@ -121491,6 +130605,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", @@ -121526,6 +130647,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", @@ -121603,6 +130731,13 @@ "metric":"chrf", "score":0.535772663 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", @@ -121610,6 +130745,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", @@ -121645,6 +130787,13 @@ "metric":"chrf", "score":0.6773057972 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ro", @@ -121799,6 +130948,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", @@ -121834,6 +130990,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"si", @@ -121925,6 +131088,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"so", @@ -121967,6 +131137,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sr", @@ -122009,6 +131186,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", @@ -122142,6 +131326,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", @@ -122303,6 +131494,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ti", @@ -122338,6 +131536,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tr", @@ -122380,6 +131585,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uk", @@ -122457,6 +131669,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", @@ -122492,6 +131711,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", @@ -122527,6 +131753,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi", @@ -122751,6 +131984,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue",