diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -69,6 +69,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", @@ -83,6 +90,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", @@ -167,6 +181,13 @@ "metric":"chrf", "score":0.1712917218 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"apc", @@ -202,6 +223,13 @@ "metric":"chrf", "score":0.4728505876 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", @@ -321,6 +349,13 @@ "metric":"chrf", "score":0.4094860171 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", @@ -370,6 +405,13 @@ "metric":"chrf", "score":0.322658401 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", @@ -419,6 +461,13 @@ "metric":"chrf", "score":0.4311272979 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", @@ -510,6 +559,13 @@ "metric":"chrf", "score":0.3854296145 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", @@ -594,6 +650,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", @@ -678,6 +741,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", @@ -769,6 +839,13 @@ "metric":"chrf", "score":0.2329401033 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", @@ -783,6 +860,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", @@ -811,6 +895,13 @@ "metric":"chrf", "score":0.4337679078 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", @@ -860,6 +951,13 @@ "metric":"chrf", "score":0.6258445826 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", @@ -874,6 +972,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", @@ -958,6 +1063,13 @@ "metric":"chrf", "score":0.7810071072 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", @@ -1014,6 +1126,13 @@ "metric":"chrf", "score":0.5837022928 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", @@ -1105,6 +1224,13 @@ "metric":"chrf", "score":0.5327770983 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", @@ -1189,6 +1315,13 @@ "metric":"chrf", "score":0.2071693978 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", @@ -1294,6 +1427,13 @@ "metric":"chrf", "score":0.3884447474 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", @@ -1420,6 +1560,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", @@ -1434,6 +1581,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", @@ -1462,6 +1616,13 @@ "metric":"chrf", "score":0.4885220189 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", @@ -1511,6 +1672,13 @@ "metric":"chrf", "score":0.5608938423 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ig", @@ -1602,6 +1770,13 @@ "metric":"chrf", "score":0.3408267624 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"it", @@ -1651,6 +1826,13 @@ "metric":"chrf", "score":0.5675711887 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", @@ -1700,6 +1882,13 @@ "metric":"chrf", "score":0.4822747548 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", @@ -1784,6 +1973,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", @@ -1798,6 +1994,13 @@ "metric":"accuracy", "score":0.1 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", @@ -1826,6 +2029,13 @@ "metric":"chrf", "score":0.3845090606 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", @@ -1875,6 +2085,13 @@ "metric":"chrf", "score":0.2932951073 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", @@ -1924,6 +2141,13 @@ "metric":"chrf", "score":0.3528205749 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", @@ -2043,6 +2267,13 @@ "metric":"chrf", "score":0.5208833255 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", @@ -2092,6 +2323,13 @@ "metric":"chrf", "score":0.4143082353 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", @@ -2141,6 +2379,13 @@ "metric":"chrf", "score":0.3265811196 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", @@ -2190,6 +2435,13 @@ "metric":"chrf", "score":0.4042462159 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", @@ -2239,6 +2491,13 @@ "metric":"chrf", "score":0.3320956129 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", @@ -2288,6 +2547,13 @@ "metric":"chrf", "score":0.6108991322 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", @@ -2337,6 +2603,13 @@ "metric":"chrf", "score":0.3897491958 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", @@ -2386,6 +2659,13 @@ "metric":"chrf", "score":0.3950140706 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"nl", @@ -2435,6 +2715,13 @@ "metric":"chrf", "score":0.5528589826 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", @@ -2484,6 +2771,13 @@ "metric":"chrf", "score":0.219484121 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"om", @@ -2533,6 +2827,13 @@ "metric":"chrf", "score":0.1905807428 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", @@ -2582,6 +2883,13 @@ "metric":"chrf", "score":0.3143126503 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", @@ -2631,6 +2939,13 @@ "metric":"chrf", "score":0.4931240563 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pl", @@ -2680,6 +2995,13 @@ "metric":"chrf", "score":0.5129096175 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ps", @@ -2694,6 +3016,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", @@ -2750,6 +3079,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", @@ -2799,6 +3135,13 @@ "metric":"chrf", "score":0.57049006 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", @@ -2890,6 +3233,13 @@ "metric":"chrf", "score":0.2723962004 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", @@ -2939,6 +3289,13 @@ "metric":"chrf", "score":0.2791093079 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", @@ -2988,6 +3345,13 @@ "metric":"chrf", "score":0.2956403655 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", @@ -3002,6 +3366,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", @@ -3030,6 +3401,13 @@ "metric":"chrf", "score":0.2850725298 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", @@ -3079,6 +3457,13 @@ "metric":"chrf", "score":0.2483400713 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", @@ -3128,6 +3513,13 @@ "metric":"chrf", "score":0.4975137727 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", @@ -3177,6 +3569,13 @@ "metric":"chrf", "score":0.3898736967 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", @@ -3191,6 +3590,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", @@ -3275,6 +3681,20 @@ "metric":"chrf", "score":0.5610336232 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", @@ -3324,6 +3744,13 @@ "metric":"chrf", "score":0.4166151764 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", @@ -3408,6 +3835,13 @@ "metric":"chrf", "score":0.3114796779 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", @@ -3499,6 +3933,13 @@ "metric":"chrf", "score":0.1093782195 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", @@ -3548,6 +3989,13 @@ "metric":"chrf", "score":0.5473239267 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", @@ -3632,6 +4080,13 @@ "metric":"chrf", "score":0.0937946455 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", @@ -3681,6 +4136,13 @@ "metric":"chrf", "score":0.3475324071 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", @@ -3730,6 +4192,13 @@ "metric":"chrf", "score":0.4435726767 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", @@ -3954,6 +4423,20 @@ "metric":"chrf", "score":0.1841004492 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", @@ -4080,6 +4563,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", @@ -4108,6 +4598,13 @@ "metric":"chrf", "score":0.2709457919 }, + { + "model":"amazon\/nova-micro-v1", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"aeb", @@ -4178,6 +4675,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", @@ -4192,6 +4696,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", @@ -4276,6 +4787,13 @@ "metric":"chrf", "score":0.3683119816 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"apc", @@ -4311,6 +4829,13 @@ "metric":"chrf", "score":0.4932559294 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", @@ -4430,6 +4955,13 @@ "metric":"chrf", "score":0.4668847579 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", @@ -4479,6 +5011,13 @@ "metric":"chrf", "score":0.4135133991 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", @@ -4528,6 +5067,13 @@ "metric":"chrf", "score":0.36455729 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", @@ -4619,6 +5165,13 @@ "metric":"chrf", "score":0.4981195393 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", @@ -4703,6 +5256,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bn", @@ -4787,6 +5347,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", @@ -4878,6 +5445,13 @@ "metric":"chrf", "score":0.5213025666 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", @@ -4892,6 +5466,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", @@ -4920,6 +5501,13 @@ "metric":"chrf", "score":0.6321620897 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"de", @@ -4969,6 +5557,13 @@ "metric":"chrf", "score":0.6782242157 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", @@ -4983,6 +5578,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", @@ -5067,6 +5669,13 @@ "metric":"chrf", "score":0.8563332446 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"es", @@ -5123,6 +5732,13 @@ "metric":"chrf", "score":0.6134636944 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", @@ -5214,6 +5830,13 @@ "metric":"chrf", "score":0.6015751183 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fr", @@ -5298,6 +5921,13 @@ "metric":"chrf", "score":0.243334857 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", @@ -5403,6 +6033,13 @@ "metric":"chrf", "score":0.4991915153 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", @@ -5529,6 +6166,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", @@ -5543,6 +6187,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", @@ -5571,6 +6222,13 @@ "metric":"chrf", "score":0.5955885461 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", @@ -5620,6 +6278,13 @@ "metric":"chrf", "score":0.6643987333 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ig", @@ -5711,6 +6376,13 @@ "metric":"chrf", "score":0.5195207754 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"it", @@ -5760,6 +6432,13 @@ "metric":"chrf", "score":0.6354726766 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ja", @@ -5809,6 +6488,13 @@ "metric":"chrf", "score":0.4600408983 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", @@ -5893,6 +6579,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", @@ -5907,6 +6600,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", @@ -5935,6 +6635,13 @@ "metric":"chrf", "score":0.5710759927 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", @@ -5984,6 +6691,13 @@ "metric":"chrf", "score":0.4380696418 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", @@ -6033,6 +6747,13 @@ "metric":"chrf", "score":0.5340229728 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", @@ -6152,6 +6873,13 @@ "metric":"chrf", "score":0.4840324931 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", @@ -6201,6 +6929,13 @@ "metric":"chrf", "score":0.4883086081 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", @@ -6250,6 +6985,13 @@ "metric":"chrf", "score":0.5212112142 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", @@ -6299,6 +7041,13 @@ "metric":"chrf", "score":0.5673214411 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", @@ -6348,6 +7097,13 @@ "metric":"chrf", "score":0.4967353717 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", @@ -6397,6 +7153,13 @@ "metric":"chrf", "score":0.7361702362 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", @@ -6446,6 +7209,13 @@ "metric":"chrf", "score":0.5575622672 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", @@ -6495,6 +7265,13 @@ "metric":"chrf", "score":0.5172440312 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"nl", @@ -6544,6 +7321,13 @@ "metric":"chrf", "score":0.6210548081 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", @@ -6593,6 +7377,13 @@ "metric":"chrf", "score":0.4684888109 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"om", @@ -6642,6 +7433,13 @@ "metric":"chrf", "score":0.4043636025 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", @@ -6691,6 +7489,13 @@ "metric":"chrf", "score":0.471197857 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", @@ -6740,6 +7545,13 @@ "metric":"chrf", "score":0.6057762292 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pl", @@ -6789,6 +7601,13 @@ "metric":"chrf", "score":0.5811687089 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ps", @@ -6803,6 +7622,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", @@ -6859,6 +7685,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", @@ -6908,6 +7741,13 @@ "metric":"chrf", "score":0.7223799311 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ru", @@ -6999,6 +7839,13 @@ "metric":"chrf", "score":0.5032987767 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", @@ -7048,6 +7895,13 @@ "metric":"chrf", "score":0.4785817971 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", @@ -7097,6 +7951,13 @@ "metric":"chrf", "score":0.468771605 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", @@ -7111,6 +7972,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", @@ -7139,6 +8007,13 @@ "metric":"chrf", "score":0.406366105 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", @@ -7188,6 +8063,13 @@ "metric":"chrf", "score":0.4927220926 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", @@ -7237,6 +8119,13 @@ "metric":"chrf", "score":0.6367420245 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", @@ -7286,6 +8175,13 @@ "metric":"chrf", "score":0.5103267256 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", @@ -7300,6 +8196,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", @@ -7384,6 +8287,20 @@ "metric":"chrf", "score":0.6665904527 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", @@ -7433,6 +8350,13 @@ "metric":"chrf", "score":0.5908056148 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"te", @@ -7517,6 +8441,13 @@ "metric":"chrf", "score":0.477780812 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", @@ -7608,6 +8539,13 @@ "metric":"chrf", "score":0.2911678276 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", @@ -7657,6 +8595,13 @@ "metric":"chrf", "score":0.5965516262 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", @@ -7741,6 +8686,13 @@ "metric":"chrf", "score":0.2728382878 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", @@ -7790,6 +8742,13 @@ "metric":"chrf", "score":0.476774558 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", @@ -7839,6 +8798,13 @@ "metric":"chrf", "score":0.6164677172 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", @@ -8063,6 +9029,20 @@ "metric":"chrf", "score":0.2293754958 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", @@ -8189,6 +9169,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", @@ -8217,6 +9204,13 @@ "metric":"chrf", "score":0.5616917702 }, + { + "model":"anthropic\/claude-3.5-sonnet", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"aeb", @@ -8287,6 +9281,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", @@ -8301,6 +9302,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", @@ -8385,6 +9393,13 @@ "metric":"chrf", "score":0.3844495283 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"apc", @@ -8420,6 +9435,13 @@ "metric":"chrf", "score":0.5299097797 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", @@ -8539,6 +9561,13 @@ "metric":"chrf", "score":0.5026533348 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", @@ -8588,6 +9617,13 @@ "metric":"chrf", "score":0.3948207636 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", @@ -8637,6 +9673,13 @@ "metric":"chrf", "score":0.4275815242 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", @@ -8728,6 +9771,13 @@ "metric":"chrf", "score":0.4904369651 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", @@ -8812,6 +9862,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bn", @@ -8896,6 +9953,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", @@ -8987,6 +10051,13 @@ "metric":"chrf", "score":0.5528408781 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", @@ -9001,6 +10072,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", @@ -9029,6 +10107,13 @@ "metric":"chrf", "score":0.592911966 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"de", @@ -9078,6 +10163,13 @@ "metric":"chrf", "score":0.693325521 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", @@ -9092,6 +10184,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", @@ -9176,6 +10275,13 @@ "metric":"chrf", "score":0.858877842 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"es", @@ -9232,6 +10338,13 @@ "metric":"chrf", "score":0.6216604607 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", @@ -9323,6 +10436,13 @@ "metric":"chrf", "score":0.6302138792 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fr", @@ -9407,6 +10527,13 @@ "metric":"chrf", "score":0.27092494 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", @@ -9512,6 +10639,13 @@ "metric":"chrf", "score":0.478395209 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", @@ -9638,6 +10772,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", @@ -9652,6 +10793,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", @@ -9680,6 +10828,13 @@ "metric":"chrf", "score":0.5905610326 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", @@ -9729,6 +10884,13 @@ "metric":"chrf", "score":0.6937537754 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ig", @@ -9820,6 +10982,13 @@ "metric":"chrf", "score":0.5360266274 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"it", @@ -9869,6 +11038,13 @@ "metric":"chrf", "score":0.617255004 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ja", @@ -9918,6 +11094,13 @@ "metric":"chrf", "score":0.4189814818 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", @@ -10002,6 +11185,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", @@ -10016,6 +11206,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", @@ -10044,6 +11241,13 @@ "metric":"chrf", "score":0.5503591158 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", @@ -10093,6 +11297,13 @@ "metric":"chrf", "score":0.423812318 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", @@ -10142,6 +11353,13 @@ "metric":"chrf", "score":0.5401137308 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", @@ -10261,6 +11479,13 @@ "metric":"chrf", "score":0.5581098509 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", @@ -10310,6 +11535,13 @@ "metric":"chrf", "score":0.5038938769 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", @@ -10359,6 +11591,13 @@ "metric":"chrf", "score":0.5343730926 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", @@ -10408,6 +11647,13 @@ "metric":"chrf", "score":0.5664991748 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", @@ -10457,6 +11703,13 @@ "metric":"chrf", "score":0.5134253387 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", @@ -10506,6 +11759,13 @@ "metric":"chrf", "score":0.7162987249 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", @@ -10555,6 +11815,13 @@ "metric":"chrf", "score":0.5325214597 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", @@ -10604,6 +11871,13 @@ "metric":"chrf", "score":0.535796278 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"nl", @@ -10653,6 +11927,13 @@ "metric":"chrf", "score":0.6133721509 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", @@ -10702,6 +11983,13 @@ "metric":"chrf", "score":0.4828092948 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"om", @@ -10751,6 +12039,13 @@ "metric":"chrf", "score":0.407269173 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", @@ -10800,6 +12095,13 @@ "metric":"chrf", "score":0.455460052 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", @@ -10849,6 +12151,13 @@ "metric":"chrf", "score":0.6165646404 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pl", @@ -10898,6 +12207,13 @@ "metric":"chrf", "score":0.5826652331 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ps", @@ -10912,6 +12228,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", @@ -10968,6 +12291,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", @@ -11017,6 +12347,13 @@ "metric":"chrf", "score":0.7003733903 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ru", @@ -11108,6 +12445,13 @@ "metric":"chrf", "score":0.5044002449 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", @@ -11157,6 +12501,13 @@ "metric":"chrf", "score":0.4992460758 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", @@ -11206,6 +12557,13 @@ "metric":"chrf", "score":0.4645319126 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", @@ -11220,6 +12578,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", @@ -11248,6 +12613,13 @@ "metric":"chrf", "score":0.3902046622 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", @@ -11297,6 +12669,13 @@ "metric":"chrf", "score":0.4964169161 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", @@ -11346,6 +12725,13 @@ "metric":"chrf", "score":0.6288500197 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", @@ -11395,6 +12781,13 @@ "metric":"chrf", "score":0.5309758013 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", @@ -11409,6 +12802,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", @@ -11493,6 +12893,20 @@ "metric":"chrf", "score":0.6470689802 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", @@ -11542,6 +12956,13 @@ "metric":"chrf", "score":0.5995810459 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"te", @@ -11626,6 +13047,13 @@ "metric":"chrf", "score":0.4777363194 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", @@ -11717,6 +13145,13 @@ "metric":"chrf", "score":0.285966574 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", @@ -11766,6 +13201,13 @@ "metric":"chrf", "score":0.6048347469 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", @@ -11850,6 +13292,13 @@ "metric":"chrf", "score":0.2949770962 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", @@ -11899,6 +13348,13 @@ "metric":"chrf", "score":0.4836035417 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", @@ -11948,6 +13404,13 @@ "metric":"chrf", "score":0.614390118 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", @@ -12172,6 +13635,20 @@ "metric":"chrf", "score":0.2943453041 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", @@ -12298,6 +13775,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", @@ -12326,6 +13810,13 @@ "metric":"chrf", "score":0.5825497566 }, + { + "model":"anthropic\/claude-3.7-sonnet", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"aeb", @@ -12396,6 +13887,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", @@ -12410,6 +13908,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", @@ -12494,6 +13999,13 @@ "metric":"chrf", "score":0.3948537197 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"apc", @@ -12529,6 +14041,13 @@ "metric":"chrf", "score":0.5400496227 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", @@ -12648,6 +14167,13 @@ "metric":"chrf", "score":0.4672104169 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", @@ -12697,6 +14223,13 @@ "metric":"chrf", "score":0.3710433705 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", @@ -12746,6 +14279,13 @@ "metric":"chrf", "score":0.4016844833 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", @@ -12837,6 +14377,13 @@ "metric":"chrf", "score":0.4495742511 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", @@ -12921,6 +14468,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bn", @@ -13005,6 +14559,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", @@ -13096,6 +14657,13 @@ "metric":"chrf", "score":0.5093826491 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", @@ -13110,6 +14678,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", @@ -13138,6 +14713,13 @@ "metric":"chrf", "score":0.6148435167 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"de", @@ -13187,6 +14769,13 @@ "metric":"chrf", "score":0.7126873721 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", @@ -13201,6 +14790,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", @@ -13285,6 +14881,13 @@ "metric":"chrf", "score":0.8046234958 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"es", @@ -13341,6 +14944,13 @@ "metric":"chrf", "score":0.619328646 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", @@ -13432,6 +15042,13 @@ "metric":"chrf", "score":0.6240074261 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fr", @@ -13516,6 +15133,13 @@ "metric":"chrf", "score":0.2487717813 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", @@ -13621,6 +15245,13 @@ "metric":"chrf", "score":0.5462639917 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", @@ -13747,6 +15378,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", @@ -13761,6 +15399,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", @@ -13789,6 +15434,13 @@ "metric":"chrf", "score":0.6039205342 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", @@ -13838,6 +15490,13 @@ "metric":"chrf", "score":0.6893145815 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ig", @@ -13929,6 +15588,13 @@ "metric":"chrf", "score":0.5246932394 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"it", @@ -13978,6 +15644,13 @@ "metric":"chrf", "score":0.6223142999 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ja", @@ -14027,6 +15700,13 @@ "metric":"chrf", "score":0.4566077399 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", @@ -14111,6 +15791,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", @@ -14125,6 +15812,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", @@ -14153,6 +15847,13 @@ "metric":"chrf", "score":0.5707828412 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", @@ -14202,6 +15903,13 @@ "metric":"chrf", "score":0.4093754295 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", @@ -14251,6 +15959,13 @@ "metric":"chrf", "score":0.5201018846 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", @@ -14370,6 +16085,13 @@ "metric":"chrf", "score":0.5356815834 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", @@ -14419,6 +16141,13 @@ "metric":"chrf", "score":0.5195593791 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", @@ -14468,6 +16197,13 @@ "metric":"chrf", "score":0.5546741997 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", @@ -14517,6 +16253,13 @@ "metric":"chrf", "score":0.5540996255 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", @@ -14566,6 +16309,13 @@ "metric":"chrf", "score":0.4820888027 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", @@ -14615,6 +16365,13 @@ "metric":"chrf", "score":0.7387347937 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", @@ -14664,6 +16421,13 @@ "metric":"chrf", "score":0.42603146 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", @@ -14713,6 +16477,13 @@ "metric":"chrf", "score":0.520988987 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"nl", @@ -14762,6 +16533,13 @@ "metric":"chrf", "score":0.5984996024 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", @@ -14811,6 +16589,13 @@ "metric":"chrf", "score":0.5195265013 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"om", @@ -14860,6 +16645,13 @@ "metric":"chrf", "score":0.3812309298 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", @@ -14909,6 +16701,13 @@ "metric":"chrf", "score":0.5101934539 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", @@ -14958,6 +16757,13 @@ "metric":"chrf", "score":0.5051708575 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pl", @@ -15007,6 +16813,13 @@ "metric":"chrf", "score":0.5693090483 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ps", @@ -15021,6 +16834,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", @@ -15077,6 +16897,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", @@ -15126,6 +16953,13 @@ "metric":"chrf", "score":0.6495822688 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ru", @@ -15217,6 +17051,13 @@ "metric":"chrf", "score":0.5565619536 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", @@ -15266,6 +17107,13 @@ "metric":"chrf", "score":0.4829492302 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", @@ -15315,6 +17163,13 @@ "metric":"chrf", "score":0.4873474492 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", @@ -15329,6 +17184,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", @@ -15357,6 +17219,13 @@ "metric":"chrf", "score":0.4950017684 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", @@ -15406,6 +17275,13 @@ "metric":"chrf", "score":0.4930051732 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", @@ -15455,6 +17331,13 @@ "metric":"chrf", "score":0.6058388421 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", @@ -15504,6 +17387,13 @@ "metric":"chrf", "score":0.5082942096 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", @@ -15518,6 +17408,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", @@ -15602,6 +17499,20 @@ "metric":"chrf", "score":0.6425140836 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", @@ -15651,6 +17562,13 @@ "metric":"chrf", "score":0.5878963723 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"te", @@ -15735,6 +17653,13 @@ "metric":"chrf", "score":0.4640276677 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", @@ -15826,6 +17751,13 @@ "metric":"chrf", "score":0.2283013271 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", @@ -15875,6 +17807,13 @@ "metric":"chrf", "score":0.5873812009 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", @@ -15959,6 +17898,13 @@ "metric":"chrf", "score":0.2411879984 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", @@ -16008,6 +17954,13 @@ "metric":"chrf", "score":0.4958567702 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", @@ -16057,6 +18010,13 @@ "metric":"chrf", "score":0.5892994562 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", @@ -16281,6 +18241,20 @@ "metric":"chrf", "score":0.2450420475 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", @@ -16407,6 +18381,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", @@ -16435,6 +18416,13 @@ "metric":"chrf", "score":0.5526475142 }, + { + "model":"anthropic\/claude-sonnet-4", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"aeb", @@ -16505,6 +18493,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", @@ -16519,6 +18514,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", @@ -16603,6 +18605,13 @@ "metric":"chrf", "score":0.2662662886 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"apc", @@ -16638,6 +18647,13 @@ "metric":"chrf", "score":0.5264317244 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", @@ -16757,6 +18773,13 @@ "metric":"chrf", "score":0.4448745325 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", @@ -16806,6 +18829,13 @@ "metric":"chrf", "score":0.3840415666 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", @@ -16855,6 +18885,13 @@ "metric":"chrf", "score":0.3746553107 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", @@ -16946,6 +18983,13 @@ "metric":"chrf", "score":0.4689651175 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", @@ -17030,6 +19074,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", @@ -17114,6 +19165,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", @@ -17205,6 +19263,13 @@ "metric":"chrf", "score":0.4341433104 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", @@ -17219,6 +19284,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", @@ -17247,6 +19319,13 @@ "metric":"chrf", "score":0.5928360984 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"de", @@ -17296,6 +19375,13 @@ "metric":"chrf", "score":0.6644677733 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", @@ -17310,6 +19396,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", @@ -17394,6 +19487,13 @@ "metric":"chrf", "score":0.8170495194 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"es", @@ -17450,6 +19550,13 @@ "metric":"chrf", "score":0.5862734644 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", @@ -17541,6 +19648,13 @@ "metric":"chrf", "score":0.5926958618 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fr", @@ -17625,6 +19739,13 @@ "metric":"chrf", "score":0.2225585574 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", @@ -17730,6 +19851,13 @@ "metric":"chrf", "score":0.5393383261 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", @@ -17856,6 +19984,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", @@ -17870,6 +20005,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", @@ -17898,6 +20040,13 @@ "metric":"chrf", "score":0.583941298 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", @@ -17947,6 +20096,13 @@ "metric":"chrf", "score":0.6305869448 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ig", @@ -18038,6 +20194,13 @@ "metric":"chrf", "score":0.4706209345 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"it", @@ -18087,6 +20250,13 @@ "metric":"chrf", "score":0.6095914494 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ja", @@ -18136,6 +20306,13 @@ "metric":"chrf", "score":0.5197322727 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", @@ -18220,6 +20397,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", @@ -18234,6 +20418,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", @@ -18262,6 +20453,13 @@ "metric":"chrf", "score":0.5039209227 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", @@ -18311,6 +20509,13 @@ "metric":"chrf", "score":0.3542419226 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", @@ -18360,6 +20565,13 @@ "metric":"chrf", "score":0.4793446685 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", @@ -18479,6 +20691,13 @@ "metric":"chrf", "score":0.489819774 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", @@ -18528,6 +20747,13 @@ "metric":"chrf", "score":0.4654775647 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", @@ -18577,6 +20803,13 @@ "metric":"chrf", "score":0.4912322205 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", @@ -18626,6 +20859,13 @@ "metric":"chrf", "score":0.5507453743 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", @@ -18675,6 +20915,13 @@ "metric":"chrf", "score":0.4918283752 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", @@ -18724,6 +20971,13 @@ "metric":"chrf", "score":0.6540376697 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", @@ -18773,6 +21027,13 @@ "metric":"chrf", "score":0.5221658577 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", @@ -18822,6 +21083,13 @@ "metric":"chrf", "score":0.4946334042 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"nl", @@ -18871,6 +21139,13 @@ "metric":"chrf", "score":0.5934736192 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", @@ -18920,6 +21195,13 @@ "metric":"chrf", "score":0.42437101 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"om", @@ -18969,6 +21251,13 @@ "metric":"chrf", "score":0.3274101513 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", @@ -19018,6 +21307,13 @@ "metric":"chrf", "score":0.4329741844 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", @@ -19067,6 +21363,13 @@ "metric":"chrf", "score":0.5490387026 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pl", @@ -19116,6 +21419,13 @@ "metric":"chrf", "score":0.5692291394 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ps", @@ -19130,6 +21440,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", @@ -19186,6 +21503,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", @@ -19235,6 +21559,13 @@ "metric":"chrf", "score":0.6282365697 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ru", @@ -19326,6 +21657,13 @@ "metric":"chrf", "score":0.4379044144 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", @@ -19375,6 +21713,13 @@ "metric":"chrf", "score":0.4402768379 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", @@ -19424,6 +21769,13 @@ "metric":"chrf", "score":0.4464397214 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", @@ -19438,6 +21790,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", @@ -19466,6 +21825,13 @@ "metric":"chrf", "score":0.4670539114 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", @@ -19515,6 +21881,13 @@ "metric":"chrf", "score":0.4541385931 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", @@ -19564,6 +21937,13 @@ "metric":"chrf", "score":0.5962190033 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", @@ -19613,6 +21993,13 @@ "metric":"chrf", "score":0.5213195361 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", @@ -19627,6 +22014,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", @@ -19711,6 +22105,20 @@ "metric":"chrf", "score":0.634706105 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", @@ -19760,6 +22168,13 @@ "metric":"chrf", "score":0.5361849509 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"te", @@ -19844,6 +22259,13 @@ "metric":"chrf", "score":0.4354012087 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", @@ -19935,6 +22357,13 @@ "metric":"chrf", "score":0.2139568479 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", @@ -19984,6 +22413,13 @@ "metric":"chrf", "score":0.5886644893 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", @@ -20068,6 +22504,13 @@ "metric":"chrf", "score":0.2015864716 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", @@ -20117,6 +22560,13 @@ "metric":"chrf", "score":0.4761547661 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", @@ -20166,6 +22616,13 @@ "metric":"chrf", "score":0.4902380763 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", @@ -20390,6 +22847,20 @@ "metric":"chrf", "score":0.2484365945 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", @@ -20516,6 +22987,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", @@ -20544,6 +23022,13 @@ "metric":"chrf", "score":0.5416847655 }, + { + "model":"deepseek\/deepseek-chat", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"aeb", @@ -20614,6 +23099,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", @@ -20628,6 +23120,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", @@ -20712,6 +23211,13 @@ "metric":"chrf", "score":0.2849440478 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"apc", @@ -20747,6 +23253,13 @@ "metric":"chrf", "score":0.5144112629 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", @@ -20866,6 +23379,13 @@ "metric":"chrf", "score":0.4676592617 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", @@ -20915,6 +23435,13 @@ "metric":"chrf", "score":0.3821492664 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", @@ -20964,6 +23491,13 @@ "metric":"chrf", "score":0.3834555839 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", @@ -21055,6 +23589,13 @@ "metric":"chrf", "score":0.4756055948 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", @@ -21139,6 +23680,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", @@ -21223,6 +23771,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", @@ -21314,6 +23869,13 @@ "metric":"chrf", "score":0.4189694789 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", @@ -21328,6 +23890,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", @@ -21356,6 +23925,13 @@ "metric":"chrf", "score":0.6187707189 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", @@ -21405,6 +23981,13 @@ "metric":"chrf", "score":0.6601015066 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", @@ -21419,6 +24002,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", @@ -21503,6 +24093,13 @@ "metric":"chrf", "score":0.8278285651 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", @@ -21559,6 +24156,13 @@ "metric":"chrf", "score":0.5918382188 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", @@ -21650,6 +24254,13 @@ "metric":"chrf", "score":0.5672691361 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", @@ -21734,6 +24345,13 @@ "metric":"chrf", "score":0.2168141904 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", @@ -21839,6 +24457,13 @@ "metric":"chrf", "score":0.5200026897 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", @@ -21965,6 +24590,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", @@ -21979,6 +24611,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", @@ -22007,6 +24646,13 @@ "metric":"chrf", "score":0.556064896 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", @@ -22056,6 +24702,13 @@ "metric":"chrf", "score":0.625735911 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ig", @@ -22147,6 +24800,13 @@ "metric":"chrf", "score":0.4538812051 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"it", @@ -22196,6 +24856,13 @@ "metric":"chrf", "score":0.5973973733 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", @@ -22245,6 +24912,13 @@ "metric":"chrf", "score":0.4709407515 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", @@ -22329,6 +25003,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", @@ -22343,6 +25024,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", @@ -22371,6 +25059,13 @@ "metric":"chrf", "score":0.4676749835 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", @@ -22420,6 +25115,13 @@ "metric":"chrf", "score":0.3265400527 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", @@ -22469,6 +25171,13 @@ "metric":"chrf", "score":0.4781553813 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", @@ -22588,6 +25297,13 @@ "metric":"chrf", "score":0.491879277 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", @@ -22637,6 +25353,13 @@ "metric":"chrf", "score":0.4613197046 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", @@ -22686,6 +25409,13 @@ "metric":"chrf", "score":0.4588308902 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", @@ -22735,6 +25465,13 @@ "metric":"chrf", "score":0.5094572017 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", @@ -22784,6 +25521,13 @@ "metric":"chrf", "score":0.4748109447 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", @@ -22833,6 +25577,13 @@ "metric":"chrf", "score":0.6535736283 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", @@ -22882,6 +25633,13 @@ "metric":"chrf", "score":0.504994716 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", @@ -22931,6 +25689,13 @@ "metric":"chrf", "score":0.5052232921 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"nl", @@ -22980,6 +25745,13 @@ "metric":"chrf", "score":0.5869217143 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", @@ -23029,6 +25801,13 @@ "metric":"chrf", "score":0.4155041047 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.1 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"om", @@ -23078,6 +25857,13 @@ "metric":"chrf", "score":0.3050512265 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", @@ -23127,6 +25913,13 @@ "metric":"chrf", "score":0.4422888692 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", @@ -23176,6 +25969,13 @@ "metric":"chrf", "score":0.5492435889 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pl", @@ -23225,6 +26025,13 @@ "metric":"chrf", "score":0.540420297 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ps", @@ -23239,6 +26046,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", @@ -23295,6 +26109,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", @@ -23344,6 +26165,13 @@ "metric":"chrf", "score":0.6072982987 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", @@ -23435,6 +26263,13 @@ "metric":"chrf", "score":0.3952274191 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", @@ -23484,6 +26319,13 @@ "metric":"chrf", "score":0.4669380076 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", @@ -23533,6 +26375,13 @@ "metric":"chrf", "score":0.3947833 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", @@ -23547,6 +26396,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", @@ -23575,6 +26431,13 @@ "metric":"chrf", "score":0.4480995236 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", @@ -23624,6 +26487,13 @@ "metric":"chrf", "score":0.4118027966 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", @@ -23673,6 +26543,13 @@ "metric":"chrf", "score":0.6058201233 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", @@ -23722,6 +26599,13 @@ "metric":"chrf", "score":0.5097240815 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", @@ -23736,6 +26620,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", @@ -23820,6 +26711,20 @@ "metric":"chrf", "score":0.6249321785 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", @@ -23869,6 +26774,13 @@ "metric":"chrf", "score":0.5596704495 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", @@ -23953,6 +26865,13 @@ "metric":"chrf", "score":0.4392843531 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", @@ -24044,6 +26963,13 @@ "metric":"chrf", "score":0.210084949 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", @@ -24093,6 +27019,13 @@ "metric":"chrf", "score":0.6304381337 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", @@ -24177,6 +27110,13 @@ "metric":"chrf", "score":0.1399251318 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", @@ -24226,6 +27166,13 @@ "metric":"chrf", "score":0.4921734247 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", @@ -24275,6 +27222,13 @@ "metric":"chrf", "score":0.4705479648 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", @@ -24499,6 +27453,20 @@ "metric":"chrf", "score":0.2529276987 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", @@ -24625,6 +27593,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", @@ -24653,6 +27628,13 @@ "metric":"chrf", "score":0.5463123746 }, + { + "model":"deepseek\/deepseek-chat-v3-0324", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.3 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"aeb", @@ -24723,6 +27705,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", @@ -24737,6 +27726,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", @@ -24821,6 +27817,13 @@ "metric":"chrf", "score":0.1225273024 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"apc", @@ -24856,6 +27859,13 @@ "metric":"chrf", "score":0.3155908724 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", @@ -24975,6 +27985,13 @@ "metric":"chrf", "score":0.434347868 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", @@ -25024,6 +28041,13 @@ "metric":"chrf", "score":0.1667611675 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", @@ -25073,6 +28097,13 @@ "metric":"chrf", "score":0.2591570919 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", @@ -25164,6 +28195,13 @@ "metric":"chrf", "score":0.3851910422 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", @@ -25248,6 +28286,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", @@ -25332,6 +28377,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", @@ -25423,6 +28475,13 @@ "metric":"chrf", "score":0.3184438517 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", @@ -25437,6 +28496,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", @@ -25465,6 +28531,13 @@ "metric":"chrf", "score":0.3746311154 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"de", @@ -25514,6 +28587,13 @@ "metric":"chrf", "score":0.3874723625 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", @@ -25528,6 +28608,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", @@ -25612,6 +28699,13 @@ "metric":"chrf", "score":0.7574063883 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"es", @@ -25668,6 +28762,13 @@ "metric":"chrf", "score":0.426538099 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", @@ -25759,6 +28860,13 @@ "metric":"chrf", "score":0.4510886635 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fr", @@ -25843,6 +28951,13 @@ "metric":"chrf", "score":0.1847185113 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", @@ -25948,6 +29063,13 @@ "metric":"chrf", "score":0.4977126554 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", @@ -26074,6 +29196,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", @@ -26088,6 +29217,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", @@ -26116,6 +29252,13 @@ "metric":"chrf", "score":0.4714945694 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", @@ -26165,6 +29308,13 @@ "metric":"chrf", "score":0.6398491182 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ig", @@ -26256,6 +29406,13 @@ "metric":"chrf", "score":0.3836219075 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"it", @@ -26305,6 +29462,13 @@ "metric":"chrf", "score":0.4908463656 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ja", @@ -26354,6 +29518,13 @@ "metric":"chrf", "score":0.3589526769 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", @@ -26438,6 +29609,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", @@ -26452,6 +29630,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", @@ -26480,6 +29665,13 @@ "metric":"chrf", "score":0.4847447773 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", @@ -26529,6 +29721,13 @@ "metric":"chrf", "score":0.1415530353 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", @@ -26578,6 +29777,13 @@ "metric":"chrf", "score":0.286449259 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", @@ -26697,6 +29903,13 @@ "metric":"chrf", "score":0.3335661802 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", @@ -26746,6 +29959,13 @@ "metric":"chrf", "score":0.2564650613 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", @@ -26795,6 +30015,13 @@ "metric":"chrf", "score":0.5001663831 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", @@ -26844,6 +30071,13 @@ "metric":"chrf", "score":0.2513871995 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", @@ -26893,6 +30127,13 @@ "metric":"chrf", "score":0.4314516197 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", @@ -26942,6 +30183,13 @@ "metric":"chrf", "score":0.6802015628 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", @@ -26991,6 +30239,13 @@ "metric":"chrf", "score":0.1972354123 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", @@ -27040,6 +30295,13 @@ "metric":"chrf", "score":0.4021301132 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"nl", @@ -27089,6 +30351,13 @@ "metric":"chrf", "score":0.5588876314 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", @@ -27138,6 +30407,13 @@ "metric":"chrf", "score":0.1893859434 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"om", @@ -27187,6 +30463,13 @@ "metric":"chrf", "score":0.2172604464 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", @@ -27236,6 +30519,13 @@ "metric":"chrf", "score":0.2623376551 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", @@ -27285,6 +30575,13 @@ "metric":"chrf", "score":0.3966391033 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pl", @@ -27334,6 +30631,13 @@ "metric":"chrf", "score":0.4560149918 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ps", @@ -27348,6 +30652,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", @@ -27404,6 +30715,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", @@ -27453,6 +30771,13 @@ "metric":"chrf", "score":0.5454578721 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ru", @@ -27544,6 +30869,13 @@ "metric":"chrf", "score":0.3273785033 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", @@ -27593,6 +30925,13 @@ "metric":"chrf", "score":0.387179761 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", @@ -27642,6 +30981,13 @@ "metric":"chrf", "score":0.2676221295 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", @@ -27656,6 +31002,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", @@ -27684,6 +31037,13 @@ "metric":"chrf", "score":0.4606246254 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", @@ -27733,6 +31093,13 @@ "metric":"chrf", "score":0.3790107218 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", @@ -27782,6 +31149,13 @@ "metric":"chrf", "score":0.5967801454 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", @@ -27831,6 +31205,13 @@ "metric":"chrf", "score":0.5298340938 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", @@ -27845,6 +31226,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", @@ -27929,6 +31317,20 @@ "metric":"chrf", "score":0.5950962977 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", @@ -27978,6 +31380,13 @@ "metric":"chrf", "score":0.3984988334 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"te", @@ -28062,6 +31471,13 @@ "metric":"chrf", "score":0.4677894227 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", @@ -28153,6 +31569,13 @@ "metric":"chrf", "score":0.0656655661 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", @@ -28202,6 +31625,13 @@ "metric":"chrf", "score":0.5317972494 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", @@ -28286,6 +31716,13 @@ "metric":"chrf", "score":0.1924672099 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", @@ -28335,6 +31772,13 @@ "metric":"chrf", "score":0.4241396601 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", @@ -28384,6 +31828,13 @@ "metric":"chrf", "score":0.3946264183 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", @@ -28608,6 +32059,20 @@ "metric":"chrf", "score":0.1387297621 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", @@ -28734,6 +32199,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", @@ -28762,6 +32234,13 @@ "metric":"chrf", "score":0.2713939288 }, + { + "model":"deepseek\/deepseek-r1", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"aeb", @@ -28895,6 +32374,13 @@ "metric":"chrf", "score":0.2588501418 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"apc", @@ -28930,6 +32416,13 @@ "metric":"chrf", "score":0.3358115304 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ar", @@ -29049,6 +32542,13 @@ "metric":"chrf", "score":0.4437585001 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", @@ -29098,6 +32598,13 @@ "metric":"chrf", "score":0.3174359519 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", @@ -29147,6 +32654,13 @@ "metric":"chrf", "score":0.171412569 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", @@ -29238,6 +32752,13 @@ "metric":"chrf", "score":0.3895554099 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", @@ -29287,6 +32808,13 @@ "metric":"chrf", "score":0.2090205571 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bn", @@ -29336,6 +32864,13 @@ "metric":"chrf", "score":0.5221618044 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", @@ -29469,6 +33004,13 @@ "metric":"chrf", "score":0.555772337 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"de", @@ -29616,6 +33158,13 @@ "metric":"chrf", "score":0.814288256 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"es", @@ -29672,6 +33221,13 @@ "metric":"chrf", "score":0.5373052889 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fa", @@ -29763,6 +33319,13 @@ "metric":"chrf", "score":0.5388064333 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fr", @@ -29847,6 +33410,13 @@ "metric":"chrf", "score":0.139589465 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", @@ -29952,6 +33522,13 @@ "metric":"chrf", "score":0.485021658 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hi", @@ -30085,6 +33662,13 @@ "metric":"chrf", "score":0.5714655622 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"id", @@ -30134,6 +33718,13 @@ "metric":"chrf", "score":0.5448923741 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ig", @@ -30225,6 +33816,13 @@ "metric":"chrf", "score":0.3577532211 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"it", @@ -30274,6 +33872,13 @@ "metric":"chrf", "score":0.4731076434 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ja", @@ -30323,6 +33928,13 @@ "metric":"chrf", "score":0.2105103816 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", @@ -30414,6 +34026,13 @@ "metric":"chrf", "score":0.4205029389 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", @@ -30463,6 +34082,13 @@ "metric":"chrf", "score":0.3308191122 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", @@ -30512,6 +34138,13 @@ "metric":"chrf", "score":0.4368728644 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ko", @@ -30631,6 +34264,13 @@ "metric":"chrf", "score":0.1762583779 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", @@ -30680,6 +34320,13 @@ "metric":"chrf", "score":0.2969811617 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mg", @@ -30729,6 +34376,13 @@ "metric":"chrf", "score":0.4545492979 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", @@ -30778,6 +34432,13 @@ "metric":"chrf", "score":0.5042437741 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", @@ -30827,6 +34488,13 @@ "metric":"chrf", "score":0.3246573528 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ms", @@ -30876,6 +34544,13 @@ "metric":"chrf", "score":0.6603531936 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", @@ -30925,6 +34600,13 @@ "metric":"chrf", "score":0.5016732556 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ne", @@ -30974,6 +34656,13 @@ "metric":"chrf", "score":0.397363268 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"nl", @@ -31023,6 +34712,13 @@ "metric":"chrf", "score":0.4586860241 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ny", @@ -31072,6 +34768,13 @@ "metric":"chrf", "score":0.2801240967 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"om", @@ -31121,6 +34824,13 @@ "metric":"chrf", "score":0.2965714462 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", @@ -31170,6 +34880,13 @@ "metric":"chrf", "score":0.3831192143 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", @@ -31219,6 +34936,13 @@ "metric":"chrf", "score":0.5329271965 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pl", @@ -31268,6 +34992,13 @@ "metric":"chrf", "score":0.5528035231 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ps", @@ -31282,6 +35013,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", @@ -31338,6 +35076,13 @@ "metric":"accuracy", "score":0.3 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ro", @@ -31387,6 +35132,13 @@ "metric":"chrf", "score":0.6249092429 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ru", @@ -31478,6 +35230,13 @@ "metric":"chrf", "score":0.3946845887 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", @@ -31527,6 +35286,13 @@ "metric":"chrf", "score":0.3420045131 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"si", @@ -31618,6 +35384,13 @@ "metric":"chrf", "score":0.3757270357 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"so", @@ -31667,6 +35440,13 @@ "metric":"chrf", "score":0.4221024153 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sr", @@ -31716,6 +35496,13 @@ "metric":"chrf", "score":0.5621225861 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", @@ -31863,6 +35650,20 @@ "metric":"chrf", "score":0.6201778863 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", @@ -31912,6 +35713,13 @@ "metric":"chrf", "score":0.4920579152 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"te", @@ -31961,6 +35769,13 @@ "metric":"chrf", "score":0.5156714123 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", @@ -32052,6 +35867,13 @@ "metric":"chrf", "score":0.1488881792 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"tr", @@ -32101,6 +35923,13 @@ "metric":"chrf", "score":0.6019992774 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uk", @@ -32150,6 +35979,13 @@ "metric":"chrf", "score":0.5585953363 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", @@ -32199,6 +36035,13 @@ "metric":"chrf", "score":0.4722890493 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", @@ -32248,6 +36091,13 @@ "metric":"chrf", "score":0.5351905044 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"vi", @@ -32472,6 +36322,20 @@ "metric":"chrf", "score":0.2476630291 }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"deepseek\/deepseek-r1-0528", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", @@ -32696,6 +36560,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", @@ -32710,6 +36581,13 @@ "metric":"accuracy", "score":0.4 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", @@ -32794,6 +36672,13 @@ "metric":"chrf", "score":0.3608782934 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"apc", @@ -32829,6 +36714,13 @@ "metric":"chrf", "score":0.5279437548 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", @@ -32948,6 +36840,13 @@ "metric":"chrf", "score":0.4912165901 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", @@ -32997,6 +36896,13 @@ "metric":"chrf", "score":0.4568625878 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", @@ -33046,6 +36952,13 @@ "metric":"chrf", "score":0.4281994918 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", @@ -33137,6 +37050,13 @@ "metric":"chrf", "score":0.4657078066 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", @@ -33221,6 +37141,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", @@ -33305,6 +37232,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", @@ -33396,6 +37330,13 @@ "metric":"chrf", "score":0.5760217609 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", @@ -33410,6 +37351,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", @@ -33438,6 +37386,13 @@ "metric":"chrf", "score":0.5939433432 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", @@ -33487,6 +37442,13 @@ "metric":"chrf", "score":0.6856946146 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", @@ -33501,6 +37463,13 @@ "metric":"accuracy", "score":1.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", @@ -33585,6 +37554,13 @@ "metric":"chrf", "score":0.8279616884 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", @@ -33641,6 +37617,13 @@ "metric":"chrf", "score":0.6166676981 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", @@ -33732,6 +37715,13 @@ "metric":"chrf", "score":0.6176510545 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", @@ -33816,6 +37806,13 @@ "metric":"chrf", "score":0.2330289804 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", @@ -33921,6 +37918,13 @@ "metric":"chrf", "score":0.5455037681 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", @@ -34047,6 +38051,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", @@ -34061,6 +38072,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", @@ -34089,6 +38107,13 @@ "metric":"chrf", "score":0.644268597 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", @@ -34138,6 +38163,13 @@ "metric":"chrf", "score":0.6964617832 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ig", @@ -34229,6 +38261,13 @@ "metric":"chrf", "score":0.5503510798 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"it", @@ -34278,6 +38317,13 @@ "metric":"chrf", "score":0.5878176707 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", @@ -34327,6 +38373,13 @@ "metric":"chrf", "score":0.4913494739 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", @@ -34411,6 +38464,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", @@ -34425,6 +38485,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", @@ -34453,6 +38520,13 @@ "metric":"chrf", "score":0.5755019454 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", @@ -34502,6 +38576,13 @@ "metric":"chrf", "score":0.3989811117 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", @@ -34551,6 +38632,13 @@ "metric":"chrf", "score":0.5684194735 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", @@ -34670,6 +38758,13 @@ "metric":"chrf", "score":0.5121389124 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", @@ -34719,6 +38814,13 @@ "metric":"chrf", "score":0.5016256548 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", @@ -34768,6 +38870,13 @@ "metric":"chrf", "score":0.5248165256 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", @@ -34817,6 +38926,13 @@ "metric":"chrf", "score":0.5530820193 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", @@ -34866,6 +38982,13 @@ "metric":"chrf", "score":0.4516145469 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", @@ -34915,6 +39038,13 @@ "metric":"chrf", "score":0.742377276 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", @@ -34964,6 +39094,13 @@ "metric":"chrf", "score":0.5121166935 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", @@ -35013,6 +39150,13 @@ "metric":"chrf", "score":0.5019977224 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"nl", @@ -35062,6 +39206,13 @@ "metric":"chrf", "score":0.6593260342 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", @@ -35111,6 +39262,13 @@ "metric":"chrf", "score":0.5553049856 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"om", @@ -35160,6 +39318,13 @@ "metric":"chrf", "score":0.4093787348 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", @@ -35209,6 +39374,13 @@ "metric":"chrf", "score":0.5108902329 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", @@ -35258,6 +39430,13 @@ "metric":"chrf", "score":0.5931595705 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pl", @@ -35307,6 +39486,13 @@ "metric":"chrf", "score":0.579529149 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ps", @@ -35321,6 +39507,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", @@ -35377,6 +39570,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", @@ -35426,6 +39626,13 @@ "metric":"chrf", "score":0.7127688163 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", @@ -35517,6 +39724,13 @@ "metric":"chrf", "score":0.5861214096 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", @@ -35566,6 +39780,13 @@ "metric":"chrf", "score":0.5390152372 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", @@ -35615,6 +39836,13 @@ "metric":"chrf", "score":0.4979151965 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", @@ -35629,6 +39857,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", @@ -35657,6 +39892,13 @@ "metric":"chrf", "score":0.5463949389 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", @@ -35706,6 +39948,13 @@ "metric":"chrf", "score":0.5048544071 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", @@ -35755,6 +40004,13 @@ "metric":"chrf", "score":0.6115459213 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", @@ -35804,6 +40060,13 @@ "metric":"chrf", "score":0.5175978358 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", @@ -35818,6 +40081,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", @@ -35902,6 +40172,20 @@ "metric":"chrf", "score":0.7212747243 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", @@ -35951,6 +40235,13 @@ "metric":"chrf", "score":0.5712604905 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", @@ -36035,6 +40326,13 @@ "metric":"chrf", "score":0.5146227404 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", @@ -36126,6 +40424,13 @@ "metric":"chrf", "score":0.2278398804 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", @@ -36175,6 +40480,13 @@ "metric":"chrf", "score":0.6455051222 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", @@ -36259,6 +40571,13 @@ "metric":"chrf", "score":0.296360163 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", @@ -36308,6 +40627,13 @@ "metric":"chrf", "score":0.4826415387 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", @@ -36357,6 +40683,13 @@ "metric":"chrf", "score":0.6242374823 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", @@ -36581,6 +40914,20 @@ "metric":"chrf", "score":0.349083777 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", @@ -36707,6 +41054,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", @@ -36735,6 +41089,13 @@ "metric":"chrf", "score":0.5663641807 }, + { + "model":"google\/gemini-2.0-flash-001", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"aeb", @@ -36805,6 +41166,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", @@ -36819,6 +41187,13 @@ "metric":"accuracy", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", @@ -36903,6 +41278,13 @@ "metric":"chrf", "score":0.3131355766 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"apc", @@ -36938,6 +41320,13 @@ "metric":"chrf", "score":0.4535351144 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", @@ -37057,6 +41446,13 @@ "metric":"chrf", "score":0.454646328 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", @@ -37106,6 +41502,13 @@ "metric":"chrf", "score":0.4113322824 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", @@ -37155,6 +41558,13 @@ "metric":"chrf", "score":0.4162476616 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", @@ -37246,6 +41656,13 @@ "metric":"chrf", "score":0.4876970107 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", @@ -37330,6 +41747,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", @@ -37414,6 +41838,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", @@ -37505,6 +41936,13 @@ "metric":"chrf", "score":0.508602676 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", @@ -37519,6 +41957,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", @@ -37547,6 +41992,13 @@ "metric":"chrf", "score":0.56756333 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", @@ -37596,6 +42048,13 @@ "metric":"chrf", "score":0.7125417889 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", @@ -37610,6 +42069,13 @@ "metric":"accuracy", "score":0.6 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", @@ -37694,6 +42160,13 @@ "metric":"chrf", "score":0.7613397345 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", @@ -37750,6 +42223,13 @@ "metric":"chrf", "score":0.6061019948 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", @@ -37841,6 +42321,13 @@ "metric":"chrf", "score":0.612989238 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", @@ -37925,6 +42412,13 @@ "metric":"chrf", "score":0.1890051706 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", @@ -38030,6 +42524,13 @@ "metric":"chrf", "score":0.5013466224 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", @@ -38156,6 +42657,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", @@ -38170,6 +42678,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", @@ -38198,6 +42713,13 @@ "metric":"chrf", "score":0.56675535 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", @@ -38247,6 +42769,13 @@ "metric":"chrf", "score":0.6624914478 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ig", @@ -38338,6 +42867,13 @@ "metric":"chrf", "score":0.5179791668 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"it", @@ -38387,6 +42923,13 @@ "metric":"chrf", "score":0.6004337743 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", @@ -38436,6 +42979,13 @@ "metric":"chrf", "score":0.3787664659 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", @@ -38520,6 +43070,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", @@ -38534,6 +43091,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", @@ -38562,6 +43126,13 @@ "metric":"chrf", "score":0.6167229896 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", @@ -38611,6 +43182,13 @@ "metric":"chrf", "score":0.4461032467 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", @@ -38660,6 +43238,13 @@ "metric":"chrf", "score":0.5550425425 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", @@ -38779,6 +43364,13 @@ "metric":"chrf", "score":0.510712197 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", @@ -38828,6 +43420,13 @@ "metric":"chrf", "score":0.498918765 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", @@ -38877,6 +43476,13 @@ "metric":"chrf", "score":0.4941543502 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", @@ -38926,6 +43532,13 @@ "metric":"chrf", "score":0.5415851472 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", @@ -38975,6 +43588,13 @@ "metric":"chrf", "score":0.4736891146 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", @@ -39024,6 +43644,13 @@ "metric":"chrf", "score":0.7014575648 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", @@ -39073,6 +43700,13 @@ "metric":"chrf", "score":0.5135188138 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", @@ -39122,6 +43756,13 @@ "metric":"chrf", "score":0.5006126727 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"nl", @@ -39171,6 +43812,13 @@ "metric":"chrf", "score":0.5984422632 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", @@ -39220,6 +43868,13 @@ "metric":"chrf", "score":0.501423047 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"om", @@ -39269,6 +43924,13 @@ "metric":"chrf", "score":0.379164388 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", @@ -39318,6 +43980,13 @@ "metric":"chrf", "score":0.5063263205 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", @@ -39367,6 +44036,13 @@ "metric":"chrf", "score":0.583867208 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pl", @@ -39416,6 +44092,13 @@ "metric":"chrf", "score":0.5847615168 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ps", @@ -39430,6 +44113,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", @@ -39486,6 +44176,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", @@ -39535,6 +44232,13 @@ "metric":"chrf", "score":0.6619427768 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", @@ -39626,6 +44330,13 @@ "metric":"chrf", "score":0.5626056331 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", @@ -39675,6 +44386,13 @@ "metric":"chrf", "score":0.48412 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", @@ -39724,6 +44442,13 @@ "metric":"chrf", "score":0.4749986301 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", @@ -39738,6 +44463,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", @@ -39766,6 +44498,13 @@ "metric":"chrf", "score":0.4635739463 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", @@ -39815,6 +44554,13 @@ "metric":"chrf", "score":0.5118672399 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", @@ -39864,6 +44610,13 @@ "metric":"chrf", "score":0.6215234533 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", @@ -39913,6 +44666,13 @@ "metric":"chrf", "score":0.5272097328 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", @@ -39927,6 +44687,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", @@ -40011,6 +44778,20 @@ "metric":"chrf", "score":0.6853284539 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", @@ -40060,6 +44841,13 @@ "metric":"chrf", "score":0.5683000198 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", @@ -40144,6 +44932,13 @@ "metric":"chrf", "score":0.4700484002 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", @@ -40235,6 +45030,13 @@ "metric":"chrf", "score":0.2594660098 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", @@ -40284,6 +45086,13 @@ "metric":"chrf", "score":0.6289997941 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", @@ -40368,6 +45177,13 @@ "metric":"chrf", "score":0.1529779827 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", @@ -40417,6 +45233,13 @@ "metric":"chrf", "score":0.493353195 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", @@ -40466,6 +45289,13 @@ "metric":"chrf", "score":0.5891323556 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", @@ -40690,6 +45520,20 @@ "metric":"chrf", "score":0.2926500144 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", @@ -40816,6 +45660,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", @@ -40844,6 +45695,13 @@ "metric":"chrf", "score":0.567169258 }, + { + "model":"google\/gemini-2.0-flash-lite-001", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"aeb", @@ -40879,6 +45737,13 @@ "metric":"chrf", "score":0.4610162591 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", @@ -40893,6 +45758,13 @@ "metric":"accuracy", "score":0.5 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", @@ -40977,6 +45849,13 @@ "metric":"chrf", "score":0.3587997566 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"apc", @@ -41012,6 +45891,13 @@ "metric":"chrf", "score":0.6118752881 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", @@ -41131,6 +46017,13 @@ "metric":"chrf", "score":0.4721454199 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", @@ -41180,6 +46073,13 @@ "metric":"chrf", "score":0.4358785934 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", @@ -41229,6 +46129,13 @@ "metric":"chrf", "score":0.3995221223 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", @@ -41320,6 +46227,13 @@ "metric":"chrf", "score":0.5250650323 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", @@ -41369,6 +46283,13 @@ "metric":"chrf", "score":0.4554148161 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bn", @@ -41418,6 +46339,13 @@ "metric":"chrf", "score":0.5492979392 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", @@ -41509,6 +46437,13 @@ "metric":"chrf", "score":0.5996552124 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", @@ -41523,6 +46458,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", @@ -41551,6 +46493,13 @@ "metric":"chrf", "score":0.5948875971 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"de", @@ -41600,6 +46549,13 @@ "metric":"chrf", "score":0.6896498523 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", @@ -41614,6 +46570,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", @@ -41698,6 +46661,13 @@ "metric":"chrf", "score":0.0 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"es", @@ -41754,6 +46724,13 @@ "metric":"chrf", "score":0.6257285571 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", @@ -41845,6 +46822,13 @@ "metric":"chrf", "score":0.6404359092 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fr", @@ -41929,6 +46913,13 @@ "metric":"chrf", "score":0.2599881115 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", @@ -42034,6 +47025,13 @@ "metric":"chrf", "score":0.5714009602 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", @@ -42125,6 +47123,13 @@ "metric":"chrf", "score":0.4261207547 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", @@ -42139,6 +47144,13 @@ "metric":"accuracy", "score":0.9 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", @@ -42167,6 +47179,13 @@ "metric":"chrf", "score":0.6720003623 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", @@ -42216,6 +47235,13 @@ "metric":"chrf", "score":0.7131031141 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ig", @@ -42307,6 +47333,13 @@ "metric":"chrf", "score":0.5329216971 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"it", @@ -42356,6 +47389,13 @@ "metric":"chrf", "score":0.5949572053 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ja", @@ -42405,6 +47445,13 @@ "metric":"chrf", "score":0.4700002965 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", @@ -42454,6 +47501,13 @@ "metric":"chrf", "score":0.6275555619 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", @@ -42468,6 +47522,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", @@ -42496,6 +47557,13 @@ "metric":"chrf", "score":0.58504635 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", @@ -42545,6 +47613,13 @@ "metric":"chrf", "score":0.4617416997 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", @@ -42594,6 +47669,13 @@ "metric":"chrf", "score":0.5568929694 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", @@ -42713,6 +47795,13 @@ "metric":"chrf", "score":0.5391710538 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", @@ -42762,6 +47851,13 @@ "metric":"chrf", "score":0.4870892013 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", @@ -42811,6 +47907,13 @@ "metric":"chrf", "score":0.5379094165 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", @@ -42860,6 +47963,13 @@ "metric":"chrf", "score":0.5389145892 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", @@ -42909,6 +48019,13 @@ "metric":"chrf", "score":0.4665758709 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", @@ -42958,6 +48075,13 @@ "metric":"chrf", "score":0.7284733826 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", @@ -43007,6 +48131,13 @@ "metric":"chrf", "score":0.544805929 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", @@ -43056,6 +48187,13 @@ "metric":"chrf", "score":0.5180107937 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"nl", @@ -43105,6 +48243,13 @@ "metric":"chrf", "score":0.6232733213 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", @@ -43154,6 +48299,13 @@ "metric":"chrf", "score":0.4585024296 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"om", @@ -43203,6 +48355,13 @@ "metric":"chrf", "score":0.4277182017 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", @@ -43252,6 +48411,13 @@ "metric":"chrf", "score":0.5214945108 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", @@ -43301,6 +48467,13 @@ "metric":"chrf", "score":0.6078708965 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pl", @@ -43350,6 +48523,13 @@ "metric":"chrf", "score":0.5993345379 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ps", @@ -43364,6 +48544,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", @@ -43420,6 +48607,13 @@ "metric":"accuracy", "score":0.7 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", @@ -43469,6 +48663,13 @@ "metric":"chrf", "score":0.7345305045 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ru", @@ -43560,6 +48761,13 @@ "metric":"chrf", "score":0.5596031593 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", @@ -43609,6 +48817,13 @@ "metric":"chrf", "score":0.4100893183 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", @@ -43658,6 +48873,13 @@ "metric":"chrf", "score":0.466344362 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", @@ -43672,6 +48894,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", @@ -43700,6 +48929,13 @@ "metric":"chrf", "score":0.4923376927 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", @@ -43749,6 +48985,13 @@ "metric":"chrf", "score":0.511793128 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", @@ -43798,6 +49041,13 @@ "metric":"chrf", "score":0.6349957477 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", @@ -43847,6 +49097,13 @@ "metric":"chrf", "score":0.5213243396 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", @@ -43861,6 +49118,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", @@ -43945,6 +49209,20 @@ "metric":"chrf", "score":0.7239645292 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", @@ -43994,6 +49272,13 @@ "metric":"chrf", "score":0.5725597295 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"te", @@ -44043,6 +49328,13 @@ "metric":"chrf", "score":0.6159040363 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", @@ -44134,6 +49426,13 @@ "metric":"chrf", "score":0.2587205011 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", @@ -44183,6 +49482,13 @@ "metric":"chrf", "score":0.6508035663 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", @@ -44232,6 +49538,13 @@ "metric":"chrf", "score":0.6135073244 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", @@ -44281,6 +49594,13 @@ "metric":"chrf", "score":0.493749829 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", @@ -44330,6 +49650,13 @@ "metric":"chrf", "score":0.6083657804 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", @@ -44554,6 +49881,20 @@ "metric":"chrf", "score":0.3463753843 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", @@ -44680,6 +50021,13 @@ "metric":"accuracy", "score":0.8 }, + { + "model":"google\/gemini-2.5-flash", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", @@ -44709,91689 +50057,99942 @@ "score":0.5773152323 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"aeb", - "task":"classification", + "model":"google\/gemini-2.5-flash", + "bcp_47":"zu", + "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.3214356805 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.5389152254 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2750202358 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4582086461 + "model":"google\/gemini-2.5-pro", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.2109602349 + "score":0.0532606841 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.4227618508 + "score":0.1073712755 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0404862392 + "score":0.0366084106 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.2400991747 + "score":0.0918534276 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"am", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.2100947434 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.4549390562 + "score":0.0152635235 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.2174710639 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.3477622396 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.3252857092 + "score":0.0888307029 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.5932296006 + "score":0.2298187784 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.3272191754 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.5729420593 + "score":0.0558323892 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.2 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.340834322 + "score":0.0713257426 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5737255077 + "score":0.1072693099 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.400951924 + "score":0.1463494979 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.6055469967 + "score":0.2497593431 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1768177745 + "score":0.0094322191 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4206182999 + "score":0.1037916124 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1661016339 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.4188678128 + "score":0.0001965409 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.2235509309 + "score":0.0267044753 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4602443001 + "score":0.1125538275 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2804587312 + "score":0.0146450668 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4871597651 + "score":0.0694240797 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.2397743064 + "score":0.0347588238 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.4543878112 + "score":0.1419212765 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.1148483938 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.3729760244 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.3589106841 + "score":0.0582570743 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5556934974 + "score":0.1132092265 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.2194832094 + "score":0.0098853623 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.4237879497 + "score":0.0165633766 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.2436266384 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.4585679488 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1759303823 + "score":0.0288866262 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.4132301771 + "score":0.0765030508 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"be", - "task":"classification", + "model":"google\/gemini-2.5-pro", + "bcp_47":"bho", + "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"be", - "task":"mgsm", + "model":"google\/gemini-2.5-pro", + "bcp_47":"bho", + "task":"classification", "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.2386849466 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.5137963545 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2980098252 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4897189336 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bho", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bho", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.2783242324 + "score":0.1414592386 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.5195378813 + "score":0.2511168982 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.2287943315 + "score":0.050165656 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.3991318481 + "score":0.059692947 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.3084224404 + "score":0.1732325986 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5121932853 + "score":0.320908965 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3583142434 + "score":0.1653880539 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.4775479891 + "score":0.2211462144 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.4286119986 + "score":0.1467739974 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.6255932052 + "score":0.1754458302 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.4113289392 + "score":0.0507024887 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.6280056275 + "score":0.1030268479 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ckb", - "task":"classification", + "model":"google\/gemini-2.5-pro", + "bcp_47":"cs", + "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ckb", - "task":"mgsm", + "model":"google\/gemini-2.5-pro", + "bcp_47":"cs", + "task":"classification", "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.3024735136 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.5519071111 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.2912283609 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.5457169481 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"cs", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"cs", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.3674320384 + "score":0.1697494021 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.6093254166 + "score":0.2799880729 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.428984445 + "score":0.2781125894 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.6342524948 + "score":0.4200022747 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3721804398 + "score":0.1216206159 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5986155742 + "score":0.2069979707 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4694707802 + "score":0.120693374 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6759134793 + "score":0.1968497699 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.3106447069 + "score":0.1189162738 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.5288637721 + "score":0.1846074997 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3186331513 + "score":0.1307394464 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5226733847 + "score":0.2486210965 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"en", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.3167547043 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.3513933765 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.5455939196 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"translation_to", "metric":"chrf", + "score":0.5713088936 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"es", + "task":"arc", + "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3582552616 + "score":0.0379503072 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5871323046 + "score":0.1087301231 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3814015085 + "score":0.1171367611 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6114167807 + "score":0.2647620406 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"fa", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.3270323847 + "score":0.0584383584 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5649606293 + "score":0.1412915198 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.1269608883 + "score":0.0396603748 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.3031131479 + "score":0.0985321352 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"fil", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.4349803398 + "score":0.0985964312 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.6438510394 + "score":0.156061678 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.3272281283 + "score":0.0702762868 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.6093718119 + "score":0.1586154477 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3355416948 + "score":0.2019995088 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5838841123 + "score":0.3236372397 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.584133096 + "score":0.3648977534 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7456335449 + "score":0.4646316658 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0679142153 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2031626473 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0290719162 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1889337147 + "score":0.013121921 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.3611805314 + "score":0.1524824027 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.5752776482 + "score":0.2518370758 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.2391737378 + "score":0.0406782903 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4921903256 + "score":0.0604419015 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ha", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ha", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.1621381069 + "score":0.1183279848 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.3417931875 + "score":0.1920673939 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.2717034881 + "score":0.1077307738 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.5355442254 + "score":0.1927713334 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"hi", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.204393553 + "score":0.2445177715 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.2745286065 + "score":0.3496977746 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.1816643198 + "score":0.1662382153 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.2510224378 + "score":0.1808826046 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.2859347325 + "score":0.1043372044 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.5184335371 + "score":0.1458478186 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1342367179 + "score":0.021727044 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.4184241385 + "score":0.0580949052 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2730596822 + "score":0.0418300745 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.5317268553 + "score":0.0806441203 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.3637494619 + "score":0.1378617741 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.6185991628 + "score":0.2350595049 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2861788949 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5319409537 + "score":0.0504154457 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3948709187 + "score":0.1355433195 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6780004275 + "score":0.2636104799 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ig", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.3188972215 + "score":0.0732503288 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.5295590822 + "score":0.1225115139 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.2811967075 + "score":0.0583064468 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.501183428 + "score":0.1449618078 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ilo", - "task":"classification", + "model":"google\/gemini-2.5-pro", + "bcp_47":"it", + "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ilo", - "task":"mgsm", + "model":"google\/gemini-2.5-pro", + "bcp_47":"it", + "task":"classification", "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.2118796398 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4066989348 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.2037665999 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.482790382 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"it", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"it", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.2760607852 + "score":0.1541511534 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.505931568 + "score":0.2680259178 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3710768942 + "score":0.1272530801 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.6060706251 + "score":0.2851760515 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2758724437 + "score":0.0473708874 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.530496596 + "score":0.1079640005 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.3593357374 + "score":0.0283865781 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4976294688 + "score":0.0449505586 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.315340995 + "score":0.1349286875 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4928049131 + "score":0.2409523809 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2934453399 + "score":0.0677858867 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5856216725 + "score":0.1661259662 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.2394833815 + "score":0.1074769757 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.5242453646 + "score":0.1755162217 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.3207384881 + "score":0.1024243094 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.5956517338 + "score":0.1478558086 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.386478574 + "score":0.1405067201 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.6064526195 + "score":0.2106207596 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1902174551 + "score":0.0416456555 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.4077331961 + "score":0.0990623031 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.2683427219 + "score":0.0795987945 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.4680683679 + "score":0.1674316707 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.3074724881 + "score":0.0674985081 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.525017962 + "score":0.1154928046 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2928122517 + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"translation_from", - "metric":"chrf", - "score":0.5280367109 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ko", - "task":"translation_to", "metric":"bleu", - "score":0.2635968583 + "score":0.0672103499 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3368468321 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.1231771445 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.3217080826 + "score":0.1538606955 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"lua", + "model":"google\/gemini-2.5-pro", + "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.0773328896 + "score":0.0731686523 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"lua", + "model":"google\/gemini-2.5-pro", + "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.1832969439 + "score":0.0985812466 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.3697289109 + "score":0.2208894183 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.6084180116 + "score":0.3047935907 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2613138272 + "score":0.006336512 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.5073964951 + "score":0.0186059462 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.3625690158 + "score":0.094505426 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.6006670528 + "score":0.2017400541 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.1897823417 + "score":0.0141271464 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4297277988 + "score":0.0334534153 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"mg", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.3313910493 + "score":0.0867381827 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.5527395882 + "score":0.1588310511 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.1964447176 + "score":0.0055901791 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.5296015703 + "score":0.0899799957 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.3414148257 + "score":0.1294816588 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5540910618 + "score":0.2140376737 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.3057528343 + "score":0.1096418767 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.5632066271 + "score":0.1484602611 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mr", - "task":"classification", + "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3287215168 + "score":0.2123460731 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5519425018 + "score":0.355254469 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2304519892 + "score":0.0244791954 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4989319097 + "score":0.137921948 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.3465169139 + "score":0.1020473557 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5808747364 + "score":0.197831409 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.4158487664 + "score":0.1388655603 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6600695767 + "score":0.3318625881 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.2409863105 + "score":0.1641407036 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.5202354498 + "score":0.266680691 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.2411557102 + "score":0.0091866723 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.5039906236 + "score":0.0286785733 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ne", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.3815914252 + "score":0.1394131915 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.6033151685 + "score":0.2765922512 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.2704724871 + "score":0.0483965296 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.5377831174 + "score":0.1060232209 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.2 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"nl", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2902133526 + "score":0.10034493 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.5026903639 + "score":0.1553807871 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.3708860145 + "score":0.2797145372 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.6406384923 + "score":0.4388238124 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ny", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.1681187244 + "score":0.034290559 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.4132334642 + "score":0.0746115811 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.247731261 + "score":0.0446825714 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.551241939 + "score":0.2751100361 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.077753472 + "score":0.035737951 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.2855262128 + "score":0.1320906069 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0121360259 + "score":0.0471165976 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.2491350822 + "score":0.1649158656 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.3515785998 + "score":0.1717883762 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.5611231864 + "score":0.2829924006 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.243065872 + "score":0.0473366133 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.4461879463 + "score":0.0472659921 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4636008739 + "score":0.3123594633 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6571795999 + "score":0.4403827935 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4815076037 + "score":0.1719687661 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.6245096256 + "score":0.1882752522 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"pl", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2488462209 + "score":0.0749300244 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.4964581993 + "score":0.1200920019 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3733344795 + "score":0.1830386377 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5954877296 + "score":0.2502216632 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.2 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3330604616 + "score":0.1361030105 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5424702945 + "score":0.2437066717 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.5373180666 + "score":0.1732366992 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.7255352889 + "score":0.2953051043 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"qu", - "task":"mgsm", + "model":"google\/gemini-2.5-pro", + "bcp_47":"ro", + "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ro", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.3593667369 + "score":0.1191095692 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.591533276 + "score":0.2528032318 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.534185929 + "score":0.1851411977 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6955454677 + "score":0.2745749365 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2611217724 + "score":0.0328251853 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5239835944 + "score":0.0948529778 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.505609426 + "score":0.2486142719 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6621791743 + "score":0.3250953964 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"rw", - "task":"classification", + "model":"google\/gemini-2.5-pro", + "bcp_47":"sd", + "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"rw", - "task":"mgsm", + "model":"google\/gemini-2.5-pro", + "bcp_47":"sd", + "task":"classification", "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.2495091982 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.5003958033 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.3005818546 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5528069872 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sd", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sd", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.3232854235 + "score":0.1260552814 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.5338248863 + "score":0.167920313 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.3633825131 + "score":0.1569836743 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.518082708 + "score":0.2025766659 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"si", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.2404030471 + "score":0.0375021678 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.4450495787 + "score":0.0518133834 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.2633769041 + "score":0.045375844 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.4000779424 + "score":0.0834549749 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.205986784 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.4106144312 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.1497281223 + "score":0.1243674491 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.4632023951 + "score":0.2280537353 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"so", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.1962561381 + "score":0.0291939407 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.3988286397 + "score":0.1082649083 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.1950290677 + "score":0.0370853459 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.471168865 + "score":0.1174899825 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"sr", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.3514693093 + "score":0.1757682146 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.6142503501 + "score":0.3008766306 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.4078134146 + "score":0.2082330564 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5830477251 + "score":0.2618628182 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.302260643 + "score":0.1395902324 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4726767086 + "score":0.2174681725 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.2173421081 + "score":0.119440034 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.5076467392 + "score":0.2479022501 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.3465319309 + "score":0.1807911166 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5749229816 + "score":0.3244473544 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.427946599 + "score":0.1337368217 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6561517251 + "score":0.2173463535 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"sw", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.3644616187 + "score":0.0167265048 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5756919768 + "score":0.0601986184 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.4348964758 + "score":0.2110013881 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6695951707 + "score":0.3419482007 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2833908051 + "score":0.1301054745 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.5256006446 + "score":0.2254627309 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.295190519 + "score":0.1643043557 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5552708257 + "score":0.2361420263 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.420625812 + "score":0.1225039269 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.6022558329 + "score":0.198958675 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.402616087 + "score":0.0931461339 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.593002412 + "score":0.1419037126 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.1 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2920493848 + "score":0.0329651295 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.5466007251 + "score":0.0924755182 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.3859165101 + "score":0.0726933467 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.5519266683 + "score":0.0966899881 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ti", - "task":"classification", + "model":"google\/gemini-2.5-pro", + "bcp_47":"tr", + "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ti", - "task":"mgsm", + "model":"google\/gemini-2.5-pro", + "bcp_47":"tr", + "task":"classification", "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.2361795584 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.423037727 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.1171487104 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.2151258274 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"tr", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"tr", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.3232096665 + "score":0.0627836379 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.565658608 + "score":0.1537747644 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3961318349 + "score":0.2746649389 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.6418401217 + "score":0.4031644529 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"uk", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.3098191173 + "score":0.1060274283 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.5405005226 + "score":0.1886969167 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.4237145042 + "score":0.044287435 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5745094679 + "score":0.1034692205 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.3505799794 + "score":0.2528660307 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5928594723 + "score":0.319204417 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.3049544641 + "score":0.1044041173 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4891357222 + "score":0.166665052 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.3150100377 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.5577978212 + "score":0.0152843146 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.3047703558 + "score":0.1294358837 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.5585171573 + "score":0.2479602917 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"vi", - "task":"classification", + "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"vi", - "task":"mgsm", + "task":"classification", "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3426773013 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5563179871 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.435916799 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6514701627 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"wo", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"wo", - "task":"mgsm", + "model":"google\/gemini-2.5-pro", + "bcp_47":"vi", + "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"wo", + "model":"google\/gemini-2.5-pro", + "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.1075258999 + "score":0.2169787191 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"wo", + "model":"google\/gemini-2.5-pro", + "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.3177870837 + "score":0.3030152567 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"wo", + "model":"google\/gemini-2.5-pro", + "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.0558894656 + "score":0.2322985671 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"wo", + "model":"google\/gemini-2.5-pro", + "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.1904131636 + "score":0.3328943549 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.2535066622 + "score":0.0600208887 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4977368741 + "score":0.1457445652 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1296483018 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1831676522 + "score":0.0006881025 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"xh", - "task":"classification", + "model":"google\/gemini-2.5-pro", + "bcp_47":"yo", + "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"xh", - "task":"mgsm", + "model":"google\/gemini-2.5-pro", + "bcp_47":"yo", + "task":"classification", "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.260645909 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4857479623 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0868490585 - }, - { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4297420084 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yo", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yo", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.1383444474 + "score":0.0093430764 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.363924587 + "score":0.0639334201 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.1077842587 + "score":0.0290365467 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.2964363774 + "score":0.0799597164 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.2602982164 + "score":0.0058978605 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.5072059049 + "score":0.0483361134 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.2013964224 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2554887406 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", + "bcp_47":"zh", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.1308987845 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.1738613828 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.1669728523 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", + "score":0.1905927635 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"zu", + "task":"arc", + "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.2804680604 + "score":0.0414601372 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.5519773239 + "score":0.0662245232 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.250972384 + "score":0.0085456627 }, { - "model":"google\/gemini-2.5-flash-lite-preview-06-17", + "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.5466457559 + "score":0.0621750153 + }, + { + "model":"google\/gemini-2.5-pro", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.3519233265 + "score":0.2073802913 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.5745897235 + "score":0.4889223975 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.2759791384 + "score":0.0840656979 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.4854942304 + "score":0.3453561943 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.2615212462 + "score":0.120094546 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.4896634905 + "score":0.3259782194 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.1667049515 + "score":0.0974181135 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.4295949613 + "score":0.3477814679 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.339375511 + "score":0.2393172056 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.566407043 + "score":0.4971254293 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.2692320655 + "score":0.2089212841 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.3685144367 + "score":0.3406916002 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.3872368514 + "score":0.2712045148 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.6253206413 + "score":0.5477096036 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.3385588073 + "score":0.178052271 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.5846751763 + "score":0.468064885 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.3462011916 + "score":0.2747843596 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5813984303 + "score":0.5519960681 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.4319399394 + "score":0.2863967069 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.6269389132 + "score":0.5318173199 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.2181205299 + "score":0.1096694862 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4844955734 + "score":0.4291604898 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.2033199109 + "score":0.1630720543 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.4375696019 + "score":0.3952400339 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.2639599801 + "score":0.1892846534 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4789807253 + "score":0.4212342522 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2907084686 + "score":0.1938470016 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4882539682 + "score":0.4527968539 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"as", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"as", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.3118761994 + "score":0.2094379574 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.5439623542 + "score":0.4509809217 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.243905252 + "score":0.1931386564 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.4730608179 + "score":0.4233010233 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"awa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"awa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.40510024 + "score":0.2957522582 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.6210515689 + "score":0.5232039352 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.2510210672 + "score":0.2308361669 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.4814090379 + "score":0.4087255612 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.3101513593 + "score":0.200456445 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.5253923906 + "score":0.4226152307 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.216177525 + "score":0.1414132922 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.4555999734 + "score":0.4170843853 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.2142226733 + "score":0.18522743 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.5048792464 + "score":0.4467570037 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.3216771298 + "score":0.2590661095 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.5056511666 + "score":0.4657468506 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.3246124272 + "score":0.2663307677 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.5547058357 + "score":0.519985227 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.2300858071 + "score":0.1913577407 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.4391471287 + "score":0.4064669591 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.3332105938 + "score":0.271237739 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5788748413 + "score":0.5173954387 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3925309816 + "score":0.3067537945 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5671456285 + "score":0.5194482945 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.4667759574 + "score":0.3694979709 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.6329355142 + "score":0.59081536 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.4086791428 + "score":0.374702944 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.6523845508 + "score":0.6019503341 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.3593703137 + "score":0.2792699678 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.6131084436 + "score":0.5157552806 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.349617489 + "score":0.2334415639 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.5888504157 + "score":0.5128705295 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.4190817149 + "score":0.3536861453 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.633614654 + "score":0.6024608455 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.3332466044 + "score":0.4031829559 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5272130276 + "score":0.6234553711 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.4080538745 + "score":0.3189602129 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.6278026445 + "score":0.5548503533 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4940498755 + "score":0.5229096392 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6915113583 + "score":0.7023434262 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.3680077775 + "score":0.3137252517 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.5798463116 + "score":0.5147981205 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.4003636095 + "score":0.3302929673 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5747287276 + "score":0.505425141 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"en", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5780592594 + "score":0.3889146477 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.727062887 + "score":0.628092835 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.725042796 + "score":0.4660772497 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.868453404 + "score":0.7280386297 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"es", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3228577771 + "score":0.3593767686 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5691176688 + "score":0.5668073679 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.4211069488 + "score":0.3662275621 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6329026504 + "score":0.6102640711 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fa", - "task":"mgsm", + "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"fa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.3659700454 + "score":0.286051969 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5807664259 + "score":0.5373856549 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.2252471667 + "score":0.1816947237 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.4480100347 + "score":0.3981159206 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"fil", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.447799796 + "score":0.3579818144 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.6437494805 + "score":0.5889481625 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.318387966 + "score":0.3403832088 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.6173467767 + "score":0.590264879 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3683003934 + "score":0.3180384008 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5961800274 + "score":0.5571267732 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5899042948 + "score":0.5778354146 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7506595836 + "score":0.7490356238 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0858271085 + "score":0.0380719948 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2495672773 + "score":0.2066039108 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0320352537 + "score":0.0259757351 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.2006317654 + "score":0.147148937 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.3793081536 + "score":0.34811918 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.5865031052 + "score":0.5482709715 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.2344717249 + "score":0.1618983325 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4873543825 + "score":0.4411905252 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ha", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ha", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.3157236901 + "score":0.1925315551 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.5365403784 + "score":0.4032389241 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.3221592053 + "score":0.1992720083 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.565816945 + "score":0.4905441802 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"hi", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.4661333335 + "score":0.3722751955 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6599650679 + "score":0.6207213131 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.4319599559 + "score":0.3378499277 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6493733048 + "score":0.554090013 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.3345560794 + "score":0.2879989689 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.5782385873 + "score":0.5083598943 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1623642047 + "score":0.1719225434 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.4427298342 + "score":0.3992950999 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hu", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"hu", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.3210417538 + "score":0.3234067809 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.5852315407 + "score":0.5706707095 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.4603569586 + "score":0.3187264685 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.6610825918 + "score":0.5856828402 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3507436052 + "score":0.3583744222 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5856872774 + "score":0.5732194975 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.387557065 + "score":0.3364664006 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6730969345 + "score":0.6438910651 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ig", + "task":"mmlu", + "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.283991055 + "score":0.225000401 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.5165825783 + "score":0.4567066441 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.297494689 + "score":0.1941055199 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.5366364022 + "score":0.4504811493 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.3679312463 + "score":0.1944477164 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.5717815434 + "score":0.4517028309 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.2293183432 + "score":0.2035517344 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.5172904845 + "score":0.489419705 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"it", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.3498261827 + "score":0.3074361781 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5707332142 + "score":0.5178180754 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.35475083 + "score":0.3505959215 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5942822113 + "score":0.5955060476 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.3590987818 + "score":0.2581140706 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5958999814 + "score":0.5395853617 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.4151803591 + "score":0.3298839393 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.5319028037 + "score":0.4471547552 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"jv", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3793672083 + "score":0.3177915441 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.6054760312 + "score":0.5387853038 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.3161702144 + "score":0.2549228547 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5956895972 + "score":0.5322440265 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.2948023962 + "score":0.1983700044 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.5499737045 + "score":0.4843458319 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.3612978689 + "score":0.2920207746 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.6021186725 + "score":0.5345155349 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"km", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"km", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.3644204358 + "score":0.3702042307 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.5994539632 + "score":0.5776853975 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.2065208646 + "score":0.1498433716 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.4474063783 + "score":0.3652702605 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.3051972526 + "score":0.2858443353 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.5542686332 + "score":0.5499221943 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.3567486129 + "score":0.2481102245 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.5611431734 + "score":0.4829685786 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ko", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2872143997 + "score":0.2305492704 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.5525223441 + "score":0.4826740501 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.3116476133 + "score":0.1746024172 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.3648802199 + "score":0.3073554703 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.1399233592 + "score":0.0913600379 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.315099285 + "score":0.3305636235 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0802015694 + "score":0.0269728382 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.3631437635 + "score":0.3155017027 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.4128862434 + "score":0.3751831337 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.6274466843 + "score":0.6085851316 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.3688684574 + "score":0.259988405 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.5833102178 + "score":0.5046714005 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.3890840766 + "score":0.306099972 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.6240476873 + "score":0.5370842801 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.2256501556 + "score":0.1736022871 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4794729055 + "score":0.4305653856 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mg", + "task":"mmlu", + "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.3701671266 + "score":0.2826629018 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.5789314386 + "score":0.5215979873 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.2339248832 + "score":0.238462643 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.5444701317 + "score":0.5334745774 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.4065449181 + "score":0.3161992509 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.6069224309 + "score":0.5479755911 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.3084108149 + "score":0.2133071404 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.5610104654 + "score":0.4660281027 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2888720948 + "score":0.2370074805 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5411662197 + "score":0.4804215458 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2820351359 + "score":0.2399769139 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.5017455952 + "score":0.4726429935 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.4137133794 + "score":0.3104483533 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.6312868188 + "score":0.5705763492 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.4812654706 + "score":0.4229626959 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.7314221816 + "score":0.6856510383 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.2 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.3696810103 + "score":0.2227645269 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.5941720939 + "score":0.4888582617 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.3374140159 + "score":0.1869632744 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.5598126987 + "score":0.4322398057 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ne", + "task":"mmlu", + "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.3690975587 + "score":0.3115387303 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.591186642 + "score":0.5342290246 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.2727284255 + "score":0.22081567 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4816401296 + "score":0.4878836055 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"nl", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.3099744217 + "score":0.3125704924 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.5540665201 + "score":0.5397676594 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.358350435 + "score":0.3586968371 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.6375438913 + "score":0.6075205554 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ny", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.2769742108 + "score":0.1901221224 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.5127880962 + "score":0.438728736 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.1953053994 + "score":0.1330024304 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.5454095128 + "score":0.4711022084 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.1552982789 + "score":0.0258426139 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.4088650794 + "score":0.2237241232 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0578542594 + "score":0.0460531144 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.4227186624 + "score":0.3418147419 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.3491547876 + "score":0.3714452662 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.6003545703 + "score":0.5977153904 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.307748028 + "score":0.2234825764 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.5199829894 + "score":0.4562477173 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4833653569 + "score":0.4180718844 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6877252787 + "score":0.6426219278 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4959043786 + "score":0.3938693136 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.631939007 + "score":0.5573992167 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"pl", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.345076722 + "score":0.2456102401 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5982949848 + "score":0.5190609119 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3888863333 + "score":0.4265619216 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.6077722316 + "score":0.6320824157 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.4185968559 + "score":0.3537745123 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.6194298291 + "score":0.5904429929 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.5066913819 + "score":0.4588664196 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.7045563325 + "score":0.6844540285 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ro", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.3510540475 + "score":0.3139442337 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5985634799 + "score":0.5741447282 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.5670740367 + "score":0.5016049999 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.7258358589 + "score":0.6788048008 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2835606416 + "score":0.2449777422 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5525332546 + "score":0.5268764903 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.5294387768 + "score":0.3789708434 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6814959444 + "score":0.5790333031 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.315385923 + "score":0.1839360587 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.5485214208 + "score":0.448997409 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.3039027455 + "score":0.1891835724 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.5685019601 + "score":0.508623725 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.4019575944 + "score":0.3352727297 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.6276858351 + "score":0.5583215205 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.3830695907 + "score":0.2009000601 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.5452929704 + "score":0.4004383195 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"si", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.3256641934 + "score":0.2297304995 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.5531574071 + "score":0.5040607132 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.3465036043 + "score":0.2136543311 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.5339014715 + "score":0.3916393466 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sn", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"sn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.2148918029 + "score":0.1221415503 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.4431856714 + "score":0.3516954503 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.2048159746 + "score":0.108688779 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.5229910344 + "score":0.4515663403 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"so", + "task":"mmlu", + "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.3102660748 + "score":0.2308889646 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.5202647222 + "score":0.4618048204 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.2456069778 + "score":0.1667302795 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.503324615 + "score":0.4717296026 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.3298581842 + "score":0.2351861569 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.5870517038 + "score":0.5329036218 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.4507987284 + "score":0.4168384094 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.6306764686 + "score":0.6032787874 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"su", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"su", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.3438400948 + "score":0.2291561983 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.5431534783 + "score":0.4673987803 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.2185629205 + "score":0.2036733766 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.513335728 + "score":0.5047620958 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.3332298772 + "score":0.3500384253 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5774389783 + "score":0.5797456052 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.4778984127 + "score":0.4268868445 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6958168144 + "score":0.664863412 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sw", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.4452506057 + "score":0.3005035588 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.6299322033 + "score":0.5210660172 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.4773237879 + "score":0.3401968092 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6983325706 + "score":0.6217197146 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ta", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ta", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2765535661 + "score":0.2662307086 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.5553462948 + "score":0.5053585639 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.3584530222 + "score":0.3044345778 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.6096517359 + "score":0.5714036731 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.4838983303 + "score":0.3330093484 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.6878651593 + "score":0.5694168709 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.4225689448 + "score":0.257812168 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.6171822766 + "score":0.5167002436 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.2905859261 + "score":0.2176170344 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.5219303661 + "score":0.4496406258 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.3215838806 + "score":0.2605536967 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.5332721729 + "score":0.4816066849 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"th", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"th", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.363389223 + "score":0.2992360169 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.5927603201 + "score":0.5585599708 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.4280908922 + "score":0.4269079012 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.5695430758 + "score":0.5719718715 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.2870643832 + "score":0.2042995208 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.5217258722 + "score":0.416626147 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.1683718664 + "score":0.0833250166 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.2689944739 + "score":0.1982489294 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tr", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"tr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"tr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.3532965173 + "score":0.2832304201 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.6170908018 + "score":0.5467240003 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.4401941591 + "score":0.3273464288 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.6752054069 + "score":0.5827048506 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"uk", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.3183568029 + "score":0.2813742416 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.569783479 + "score":0.5413704266 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.4278428166 + "score":0.4063054094 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.6040719448 + "score":0.6020718231 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.037144113 + "score":0.0495917134 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.089331464 + "score":0.1711087397 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0447156552 + "score":0.0358872001 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.2228994735 + "score":0.2761667256 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.3194163699 + "score":0.322448107 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5758075077 + "score":0.5887654616 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2609063103 + "score":0.2148139783 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4351160146 + "score":0.4226865444 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.3488574237 + "score":0.2829644119 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.5833892626 + "score":0.5194956482 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.3525318348 + "score":0.2911955464 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.600901419 + "score":0.5560139888 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"vi", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.4227968054 + "score":0.2745000434 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.6326522416 + "score":0.5206422805 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.4693432911 + "score":0.4261790941 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6659480306 + "score":0.6358462464 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.1552091518 + "score":0.0696458062 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.3984533183 + "score":0.2461140434 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0934433089 + "score":0.0728989985 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.3362523248 + "score":0.2267265908 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.3241760602 + "score":0.2072386748 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.5694459727 + "score":0.4628288648 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1373684835 + "score":0.1471425714 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1808455778 + "score":0.1971299212 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.2594184267 + "score":0.253783308 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.4970898641 + "score":0.4487387303 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.1342973002 + "score":0.0662544821 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.4760810578 + "score":0.3784904721 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"yo", + "task":"arc", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"yo", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.1670055121 + "score":0.0887390501 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.3949060751 + "score":0.3201148841 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.1271782927 + "score":0.1178050815 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.2784136749 + "score":0.307433063 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.2666023686 + "score":0.2253512269 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.5200218275 + "score":0.4949150094 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1881613886 + "score":0.1897306863 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2877597368 + "score":0.2561574259 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2632355471 + "score":0.2201641871 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5527183044 + "score":0.5051068628 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.3128970722 + "score":0.2508351517 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3699956681 + "score":0.3110461024 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", + "bcp_47":"zu", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.3295494586 + "score":0.252616884 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.557523684 + "score":0.4822778382 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.3146714644 + "score":0.1940901676 }, { - "model":"google\/gemini-2.5-flash-preview", + "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.5811521396 + "score":0.4661416094 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.342116281 + "score":0.2041309024 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.559017125 + "score":0.4630820951 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.2612038772 + "score":0.1453469275 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.4610162591 + "score":0.3874336138 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.2 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.2362598693 + "score":0.1059711376 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.4567970323 + "score":0.300568481 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.1615485704 + "score":0.0110669593 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.4248295507 + "score":0.2141540563 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.2883756135 + "score":0.1906134629 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.5452929372 + "score":0.4467868389 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.2563045907 + "score":0.2001643223 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.3587997566 + "score":0.3225170104 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.3529391424 + "score":0.1876459632 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.6061230642 + "score":0.4830875841 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.3735935027 + "score":0.1830944017 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.6118752881 + "score":0.4175337587 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.3509446122 + "score":0.260683336 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5872861306 + "score":0.5383651277 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.4369226272 + "score":0.2962406565 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.6357392299 + "score":0.5190026627 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.178647434 + "score":0.1438491224 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.459184816 + "score":0.418499848 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1766325657 + "score":0.1044262978 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.42829263 + "score":0.3577242047 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.2689181562 + "score":0.1675595946 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.5103302194 + "score":0.4187188467 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2811045733 + "score":0.1545869288 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4796679654 + "score":0.4031218248 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.302725237 + "score":0.1892328534 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.542445303 + "score":0.4434206925 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.2074435657 + "score":0.1070430926 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.4358785934 + "score":0.3318636339 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"awa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.3306584572 + "score":0.3180630074 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5470737398 + "score":0.5401606876 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.1839015438 + "score":0.1993490206 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.3995221223 + "score":0.4215901923 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.2402619776 + "score":0.1604267099 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.4816842061 + "score":0.4139767864 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.2017479595 + "score":0.1651025864 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.4438028104 + "score":0.3978212407 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.1964434077 + "score":0.19213953 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.4658109118 + "score":0.4512512424 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.3578761246 + "score":0.2406657525 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.5250650323 + "score":0.4536513075 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.3118354834 + "score":0.2519150677 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.5301057957 + "score":0.4966963131 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.2712094702 + "score":0.2057435019 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.4554148161 + "score":0.4016427491 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2928560114 + "score":0.2183929994 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5373877759 + "score":0.4877941086 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.371713483 + "score":0.2700916391 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5467648432 + "score":0.5150577414 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.4167458111 + "score":0.2900668497 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.6414773714 + "score":0.5379961095 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.4574369641 + "score":0.3193377157 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.6642298649 + "score":0.5978978692 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.3171272191 + "score":0.2457083208 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.5378911972 + "score":0.4937183307 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.3576320675 + "score":0.1424911854 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.5996552124 + "score":0.3546559531 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.3763167038 + "score":0.2481120403 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.6069984198 + "score":0.5339550423 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.412011399 + "score":0.3131426524 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5948875971 + "score":0.5548197404 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3827187056 + "score":0.3440655166 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.6092943987 + "score":0.563902418 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.5181328066 + "score":0.4305522274 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6981190119 + "score":0.6477508732 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"el", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.3297475202 + "score":0.2080428665 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.5587828835 + "score":0.4376921278 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3865296224 + "score":0.3106300811 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5630460332 + "score":0.4971105137 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"en", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5942868447 + "score":0.4113463435 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.7298379077 + "score":0.6260248317 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.7309888475 + "score":0.5310035709 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8534239972 + "score":0.7595845064 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3497891701 + "score":0.3031284355 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5889817614 + "score":0.5241309352 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.395885417 + "score":0.3289699508 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6273835234 + "score":0.5811203167 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"fa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.3764428485 + "score":0.2483616515 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5794911256 + "score":0.5338391625 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.3019679958 + "score":0.1651229998 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.5314471888 + "score":0.3923963113 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"fil", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3970330872 + "score":0.3057177881 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.6224300541 + "score":0.5523945263 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.3731416111 + "score":0.2711892461 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.6421804886 + "score":0.5802332073 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3737227872 + "score":0.3349110908 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.6039484183 + "score":0.564806297 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5555743798 + "score":0.4574014191 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7229519037 + "score":0.6626552528 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0654195918 + "score":0.0488154154 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2431272498 + "score":0.1944904286 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0469027058 + "score":0.0115014356 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.2599881115 + "score":0.1082073343 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.344471287 + "score":0.2954497906 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.5591834446 + "score":0.5086877895 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.180119873 + "score":0.1582270271 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4654772276 + "score":0.4201411039 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ha", + "task":"arc", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ha", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.2852268084 + "score":0.1697291765 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.4820918601 + "score":0.4073157654 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.318588004 + "score":0.139672818 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.5675346049 + "score":0.4450194819 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.4387215588 + "score":0.3184042229 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6443095169 + "score":0.5712698408 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.4255607348 + "score":0.3659029431 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6457270128 + "score":0.5796195236 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.336133928 + "score":0.2791185419 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.545638091 + "score":0.5226615992 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1683321583 + "score":0.145820804 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.4261207547 + "score":0.3820953887 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"hu", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.3151646581 + "score":0.2313008892 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.5599039863 + "score":0.4915348458 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.4642022823 + "score":0.2765024802 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.6720003623 + "score":0.5261755337 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3789722313 + "score":0.2301748885 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5973074889 + "score":0.4682741896 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.406320567 + "score":0.3346592082 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6828299565 + "score":0.6514874668 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ig", + "task":"mmlu", + "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.2779082008 + "score":0.154261694 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.5108078595 + "score":0.3957095627 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.2835120188 + "score":0.1177946719 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.5067796001 + "score":0.341868335 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.2679844764 + "score":0.2239397579 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.5037933164 + "score":0.4512212104 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.243508886 + "score":0.135748348 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.5329216971 + "score":0.4408716957 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", - "task":"mgsm", + "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"it", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"it", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.3177725576 + "score":0.2724260509 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5648818053 + "score":0.5200202435 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3654943432 + "score":0.2753225284 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5949572053 + "score":0.5457466615 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.3423388267 + "score":0.2664966821 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5884893314 + "score":0.5386982677 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.3740472584 + "score":0.2651365589 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4832664839 + "score":0.409095006 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3540174328 + "score":0.2326358655 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5513315973 + "score":0.4815897231 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.329885986 + "score":0.2187928356 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5805485408 + "score":0.4896578943 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.2553025069 + "score":0.1666068635 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.5408614418 + "score":0.4554883841 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.3062994849 + "score":0.1547742726 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.58504635 + "score":0.4534139462 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"km", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.3700483899 + "score":0.2647824193 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.6006134318 + "score":0.5269086196 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.2315452529 + "score":0.1635334444 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.4617416997 + "score":0.3431273828 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"kn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.2512885128 + "score":0.247746183 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.4934774392 + "score":0.506339637 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.3278617048 + "score":0.1775009719 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.5102619479 + "score":0.4300321597 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ko", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2465751079 + "score":0.2402657185 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.515705028 + "score":0.497198112 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.2528779436 + "score":0.3100527074 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.3488788279 + "score":0.3696197774 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0591613845 + "score":0.0872330227 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2662007935 + "score":0.3059813913 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0779434955 + "score":0.0010116202 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.3095367271 + "score":0.1893341465 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.3908377774 + "score":0.3447519877 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.6260985434 + "score":0.578789784 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.3130016025 + "score":0.2508560655 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.5391710538 + "score":0.4987822313 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.3471192 + "score":0.2732982319 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.5771683143 + "score":0.5233285219 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.2207082106 + "score":0.137657899 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4870892013 + "score":0.3935929024 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mg", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mg", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.2993063676 + "score":0.2082275626 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.5474937127 + "score":0.4639776287 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.2153101678 + "score":0.1350252624 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.5379094165 + "score":0.4834543859 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.3532448793 + "score":0.2600500491 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5657281022 + "score":0.5029669853 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.3046979511 + "score":0.2091322046 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.5523287159 + "score":0.4711774201 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", - "task":"mgsm", + "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3055522766 + "score":0.2256246926 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5435968357 + "score":0.4703189943 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.257460039 + "score":0.2254492518 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4845716024 + "score":0.447826525 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.3746934831 + "score":0.2986303081 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.6046609636 + "score":0.5546917725 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.4734156929 + "score":0.3680194341 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.7284733826 + "score":0.6778287705 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.3 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.3552781219 + "score":0.1979480779 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.5977013775 + "score":0.4791457508 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.3211140622 + "score":0.1536786708 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.544805929 + "score":0.4315811907 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ne", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.3774439938 + "score":0.2955515679 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.599804205 + "score":0.5399574649 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.2861323795 + "score":0.2350766648 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.5180107937 + "score":0.4890671168 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"nl", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"nl", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.312116976 + "score":0.2583853642 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.5463170004 + "score":0.5143387984 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.3780530389 + "score":0.3585971813 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.6232733213 + "score":0.6255063069 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.2 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ny", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.2177968416 + "score":0.1562574059 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.4415294523 + "score":0.378833839 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.1367315108 + "score":0.1000795039 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.4585024296 + "score":0.447037349 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.2 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.1871006972 + "score":0.0981161875 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.4365658925 + "score":0.3370208163 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.106229994 + "score":0.040931235 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.4277182017 + "score":0.3615428475 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.3740486194 + "score":0.2626677598 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.6141131486 + "score":0.4970567085 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.31231668 + "score":0.1807466012 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.5222572922 + "score":0.4219189716 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4182659606 + "score":0.3706063992 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6696655487 + "score":0.6167676482 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4665320226 + "score":0.4234596823 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.6280330147 + "score":0.5629443923 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"pl", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"pl", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.3282584844 + "score":0.2754265608 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5713765921 + "score":0.5207065369 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3762753954 + "score":0.3700040895 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5940500139 + "score":0.5924241261 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3485890022 + "score":0.2971403532 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.574915747 + "score":0.5321068893 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4760104707 + "score":0.4364286549 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6891420805 + "score":0.6509885745 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.3 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ro", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.3285564053 + "score":0.27702997 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5835489949 + "score":0.5437386483 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.5863602394 + "score":0.4580925611 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.7345305045 + "score":0.6514836722 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2893557086 + "score":0.2300270544 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5514512546 + "score":0.4839384065 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.4910352633 + "score":0.4236492288 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6636379715 + "score":0.6116207052 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.2768738298 + "score":0.1602143293 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.5136190092 + "score":0.3793757948 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.2743310586 + "score":0.1370228414 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.5596031593 + "score":0.4797772284 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.3296224998 + "score":0.2571204202 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.5499189472 + "score":0.4946608155 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.2231786181 + "score":0.0869374651 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.407169401 + "score":0.3119061498 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"si", + "task":"mmlu", + "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.2632131459 + "score":0.1981443603 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.4981476408 + "score":0.4536105905 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.3150668549 + "score":0.2278086127 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.466344362 + "score":0.4013315084 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.1 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.1425864886 + "score":0.0939343156 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.3921687091 + "score":0.3129229613 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.1876717865 + "score":0.074740365 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.4923376927 + "score":0.4045773842 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.3 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"so", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.2921978364 + "score":0.2273071628 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.5121729513 + "score":0.4497946959 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.2437506181 + "score":0.1824497409 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.511793128 + "score":0.4972329945 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.3070769379 + "score":0.2541965029 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.5829431146 + "score":0.5234491687 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.4562210568 + "score":0.3308712415 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.6349957477 + "score":0.5512495988 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.2923338131 + "score":0.1522391036 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4917795718 + "score":0.3508292995 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.2448808161 + "score":0.1514782919 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.5213243396 + "score":0.4909144205 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.340913979 + "score":0.2959760233 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5878242881 + "score":0.5398896148 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.4174508433 + "score":0.4357891553 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6558948574 + "score":0.6529723913 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sw", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.397583037 + "score":0.2501435914 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.6014699575 + "score":0.5088299265 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.4540021563 + "score":0.262372343 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.7002830052 + "score":0.5806899403 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2981018347 + "score":0.2450100573 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.5573394656 + "score":0.4918691312 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.3033361537 + "score":0.2434733519 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5406646936 + "score":0.5120095348 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.4025505277 + "score":0.3568851036 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.6339036498 + "score":0.5825326367 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.4166086163 + "score":0.3112091725 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.6171953291 + "score":0.5431414206 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.2859260679 + "score":0.1741933649 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.5305344486 + "score":0.4272342177 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.3454858711 + "score":0.1811584685 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.5585923193 + "score":0.4001890626 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", - "task":"mgsm", + "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"th", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.3196721957 + "score":0.2544201673 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.5597673276 + "score":0.5081271409 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.4114706745 + "score":0.2942923294 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.560767027 + "score":0.4479604827 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.2476296934 + "score":0.0881111208 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.5033069835 + "score":0.3173214379 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.1179117378 + "score":0.0384668791 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.2587205011 + "score":0.1259439982 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"tr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.3291780472 + "score":0.2563119866 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5900439285 + "score":0.5291012922 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.4285247051 + "score":0.3009595898 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.6508035663 + "score":0.5854044281 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", - "task":"mgsm", + "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uk", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.3407035036 + "score":0.2567288533 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.583433778 + "score":0.5177571061 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.4167194618 + "score":0.3201007033 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.6135073244 + "score":0.5323037228 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0287200326 + "score":0.0115980217 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.1566177223 + "score":0.115883071 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0503465615 + "score":0.0129780747 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.21805483 + "score":0.1450749981 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.3212777323 + "score":0.2402951661 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5735084601 + "score":0.5033005385 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.3027958989 + "score":0.2302239803 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4912965943 + "score":0.4066956434 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.2583582755 + "score":0.203750264 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.483302551 + "score":0.4979829233 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.3579243963 + "score":0.2162945849 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.6083657804 + "score":0.4941278712 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.3471784526 + "score":0.2537752957 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5719753053 + "score":0.5073147534 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.4601231177 + "score":0.3583753747 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6644848789 + "score":0.6253917282 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0932068478 + "score":0.1081430594 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.3792197219 + "score":0.2665454299 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.1171143464 + "score":0.0142970887 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.3384795969 + "score":0.1489810124 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.2968992777 + "score":0.1700904158 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.5258850631 + "score":0.421111634 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1321430026 + "score":0.1213993524 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.176594989 + "score":0.1655788185 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.1979226992 + "score":0.1531171972 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.4588070152 + "score":0.3828830786 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.1181506898 + "score":0.0483942569 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.4718021868 + "score":0.3116951706 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"yo", + "task":"arc", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"yo", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.1110331374 + "score":0.0816098185 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.3622261661 + "score":0.2781732759 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.1764819134 + "score":0.041496472 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.3463753843 + "score":0.202397124 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.2756047962 + "score":0.1783312983 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.518253876 + "score":0.4423885999 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.2356751392 + "score":0.1632112014 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.3051882861 + "score":0.2297357227 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"zh", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2989639846 + "score":0.249810194 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5646599732 + "score":0.5151255506 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2915310914 + "score":0.1854861198 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3358004159 + "score":0.2463326959 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"zu", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.2 + }, + { + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.3429319501 + "score":0.1729786376 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.5895603725 + "score":0.4189697233 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.2713803282 + "score":0.0866404913 }, { - "model":"google\/gemini-2.5-flash-preview-05-20", + "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.5773152323 + "score":0.3943277627 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", + "model":"google\/gemini-flash-1.5-8b", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"aeb", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.2177971147 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.0119333611 + "score":0.4738076987 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.0433795513 + "score":0.1414064724 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.086624937 + "score":0.3965739567 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"af", "task":"translation_from", "metric":"bleu", - "score":0.1420800017 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"af", "task":"translation_from", "metric":"chrf", - "score":0.2571278005 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"af", "task":"translation_to", "metric":"bleu", - "score":0.0740143168 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"af", "task":"translation_to", "metric":"chrf", - "score":0.0900638149 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.075270263 + "score":0.076595229 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.1801898422 + "score":0.2493366365 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.079678832 + "score":0.0440715947 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.1387482052 + "score":0.2820233612 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.1913062339 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.4296053228 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.13443556 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.2528930204 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"am", + "task":"truthfulqa", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"mgsm", + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", + "task":"classification", "metric":"accuracy", - "score":0.3 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.0322638642 + "score":0.24265587 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.1070675634 + "score":0.4918380331 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.1132855578 + "score":0.2104382871 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.2363989029 + "score":0.456050442 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.0628255969 + "score":0.2891206499 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.1404540616 + "score":0.5438550217 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.0929691836 + "score":0.3184721364 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.1936176153 + "score":0.5483731849 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1363784433 + "score":0.1664804364 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.2546980579 + "score":0.4585261833 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.2831711013 + "score":0.1299183594 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.423402744 + "score":0.3752977557 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.1618648119 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.4104839109 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.046932578 + "score":0.2266738862 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.1114891082 + "score":0.4315390742 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.0849232664 + "score":0.2602059805 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.1180502872 + "score":0.4987515978 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.16572501 + "score":0.0907943093 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.2936653353 + "score":0.348768221 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.0353192374 + "score":0.3277667824 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.0912070168 + "score":0.5267403611 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.0843198982 + "score":0.2123273366 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.1834427215 + "score":0.408906638 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.1728301922 + "score":0.1880331404 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.2722873664 + "score":0.4234748209 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.0452386704 + "score":0.1517877566 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.0759200811 + "score":0.423956163 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"be", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.1519234884 + "score":0.148851004 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.2526899598 + "score":0.4514291775 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.0370612896 + "score":0.3105472783 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.1525405786 + "score":0.498684126 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.0673608539 + "score":0.2430984589 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.1795707438 + "score":0.4969060141 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.0936480171 + "score":0.1699224465 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.1378127408 + "score":0.3964402252 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bm", "task":"translation_from", "metric":"bleu", - "score":0.1331943797 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bm", "task":"translation_from", "metric":"chrf", - "score":0.2183301061 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bm", "task":"translation_to", "metric":"bleu", - "score":0.0187720962 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bm", "task":"translation_to", "metric":"chrf", - "score":0.0552092745 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.3020434614 + "score":0.2907230812 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4007490688 + "score":0.5148223626 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.2320524279 + "score":0.3473636391 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.2712383449 + "score":0.5442574441 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ca", "task":"translation_from", "metric":"bleu", - "score":0.1557590383 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ca", "task":"translation_from", "metric":"chrf", - "score":0.2999541814 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ca", "task":"translation_to", "metric":"bleu", - "score":0.2122477941 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ca", "task":"translation_to", "metric":"chrf", - "score":0.2481311219 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.0568779007 + "score":0.3650597419 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.1090963624 + "score":0.5512750223 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.2392496577 + "score":0.293824845 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.3103186792 + "score":0.5724817779 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.0309583316 + "score":0.1983726871 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.0727861543 + "score":0.4779908235 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.2299189265 + "score":0.0792877335 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.3783014951 + "score":0.3908004248 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"cs", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"cs", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.0594432616 + "score":0.2863884915 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.1443026551 + "score":0.5641108436 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.0648561428 + "score":0.352498756 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.0778645979 + "score":0.5860513143 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.2378006592 + "score":0.3577876868 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.3506619317 + "score":0.5943423055 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.0741460229 + "score":0.5344280565 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.1278099202 + "score":0.7084649844 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"el", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.0175340474 + "score":0.2914236052 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.1117737118 + "score":0.5147962724 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.2807209617 + "score":0.3761179017 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.3753520077 + "score":0.5590147212 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.0505391541 + "score":0.452427177 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.08189026 + "score":0.6525566656 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.0784600421 + "score":0.5467976399 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.1237015541 + "score":0.7780833183 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"en", + "task":"truthfulqa", "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1167387293 + "score":0.4 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.1450666727 + "model":"google\/gemma-3-27b-it", + "bcp_47":"es", + "task":"arc", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3227616178 + "model":"google\/gemma-3-27b-it", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.4512518394 + "model":"google\/gemma-3-27b-it", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"es", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.0673836275 + "score":0.3554331718 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.12575806 + "score":0.5997743406 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.0232292611 + "score":0.3847830842 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.0267312591 + "score":0.6191109047 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"fa", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.0228307046 + "score":0.2579194729 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.1147202951 + "score":0.5351839762 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.0526793492 + "score":0.1571676635 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.0504077394 + "score":0.4046770996 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"mgsm", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fil", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.3771043132 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.5835797455 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.3564426025 }, { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.6107274367 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"fr", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.0474100022 + "score":0.2846092378 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.1123198175 + "score":0.5655970541 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.0125352085 + "score":0.5192984544 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.0283735992 + "score":0.7020040834 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.1714170596 + "score":0.0220051815 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.322217716 + "score":0.1861453784 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0919538797 + "score":0.0227307294 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1130426788 + "score":0.1850492522 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.0507278464 + "score":0.2796934014 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.1374307154 + "score":0.5155626456 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.089396706 + "score":0.1979202011 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.164648643 + "score":0.4528880823 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ha", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ha", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.1595296755 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.3859356797 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.171830216 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.4608354018 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"hi", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.0644544724 + "score":0.3520691191 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.1431172464 + "score":0.6035990708 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.1618106339 + "score":0.3987037224 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.2951594047 + "score":0.6195037668 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.1227062573 + "score":0.2709410734 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.1954611936 + "score":0.4976144005 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.0908738713 + "score":0.1150407607 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.1791636936 + "score":0.3709160058 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ht", "task":"translation_from", "metric":"bleu", - "score":0.115635659 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ht", "task":"translation_from", "metric":"chrf", - "score":0.21001665 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ht", "task":"translation_to", "metric":"bleu", - "score":0.1625509988 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"fr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ht", "task":"translation_to", "metric":"chrf", - "score":0.2704886762 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"hu", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hu", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.023697236 + "score":0.2903150375 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.0393020248 + "score":0.5392715859 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.3460432788 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"hi", + "model":"google\/gemma-3-27b-it", + "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.6009670508 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 }, { - "model":"google\/gemini-2.5-pro-preview", + "model":"google\/gemma-3-27b-it", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.3377417704 }, { - "model":"google\/gemini-2.5-pro-preview", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.5674360496 }, { - "model":"google\/gemini-2.5-pro-preview", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.0525040446 + "score":0.3534620252 }, { - "model":"google\/gemini-2.5-pro-preview", + "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.1506768629 + "score":0.6680177029 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ig", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0597280929 + "score":0.1600009223 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.1348390312 + "score":0.3857586031 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.1112548291 + "score":0.1602266912 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.2280150235 + "score":0.4091024664 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ilo", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.5 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.0835367034 + "score":0.1546473042 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.1878815782 + "score":0.3985794204 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.0362939834 + "score":0.1752645287 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.0593832479 + "score":0.4668449261 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"jv", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"it", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.0595037472 - }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.1440055677 + "score":0.6 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.0 + "model":"google\/gemma-3-27b-it", + "bcp_47":"it", + "task":"classification", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.1216146696 + "model":"google\/gemma-3-27b-it", + "bcp_47":"it", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ko", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"it", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.0693648124 + "score":0.3356485456 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.1989936561 + "score":0.5684527887 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.0087393249 + "score":0.3627134123 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.0221958957 + "score":0.6050822949 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1016207551 + "score":0.2481856237 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.1937396468 + "score":0.5180749152 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.0232755836 + "score":0.242508046 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"mr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.0683476995 + "score":0.4046420215 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3259062344 + "score":0.3019627022 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4538874381 + "score":0.5133980923 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.1871393365 + "score":0.2316517545 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.2200892244 + "score":0.5189963647 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ki", "task":"translation_from", "metric":"bleu", - "score":0.1406086157 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ki", "task":"translation_from", "metric":"chrf", - "score":0.2057127554 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ki", "task":"translation_to", "metric":"bleu", - "score":0.2664628529 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"pt", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ki", "task":"translation_to", "metric":"chrf", - "score":0.3035324696 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"kk", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.1104161719 + "score":0.2187004813 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.2301836625 + "score":0.4910590831 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.185463581 + "score":0.2108939118 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ru", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.2585022568 + "score":0.4375825873 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.3274744668 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.0271861792 + "score":0.5605813039 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1476512664 + "score":0.0800539722 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"sw", + "model":"google\/gemma-3-27b-it", + "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3192760623 + "score":0.3336188156 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.2136207431 + "score":0.2692189197 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.3120102842 + "score":0.5290912174 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.114908602 + "score":0.2869741566 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ta", + "model":"google\/gemma-3-27b-it", + "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.1409558682 + "score":0.5072256514 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ko", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.1209511429 + "score":0.2172591082 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.1906473928 + "score":0.478962626 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.1356783563 + "score":0.2169046229 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"te", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.2003050615 + "score":0.3151387909 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0658153186 + "score":0.0905061152 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.1043402283 + "score":0.321707617 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.2785590215 + "score":0.0361920973 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"tr", + "model":"google\/gemma-3-27b-it", + "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.3929421464 + "score":0.222315171 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.1341611323 + "score":0.3100950481 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.2099852043 + "score":0.558054933 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.0532408841 + "score":0.2213152575 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"ur", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.0792848959 + "score":0.4821662369 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"vi", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2017921323 - }, - { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.2758120522 + "score":0.6 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.2703866537 + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", + "task":"classification", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.3348006046 + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"wuu", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"wuu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.0272258942 + "score":0.3126340837 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"wuu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.0804193027 + "score":0.5601639768 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"wuu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.0083321447 + "score":0.1875297747 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"wuu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.01475986 + "score":0.4394137195 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mg", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.2502298144 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.0562014737 + "score":0.5206889602 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0400341425 + "score":0.1301910408 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"yue", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.0738358517 + "score":0.4488625613 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.2809005667 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.5466717628 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.2287455417 }, { - "model":"google\/gemini-2.5-pro-preview", - "bcp_47":"zh", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.4915489263 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"aeb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"aeb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.0278093886 + "score":0.3395095603 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"aeb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.1209403877 + "score":0.5877742809 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"aeb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.0460469917 + "score":0.2222923122 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"aeb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.1583874959 + "score":0.4572688692 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"af", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ak", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ak", - "task":"mgsm", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ak", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.0746358572 + "score":0.3794800258 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ak", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.2039454163 + "score":0.6256125923 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ak", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.0106555853 + "score":0.3593747877 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ak", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.1635891431 + "score":0.664135376 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"am", + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"am", + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"am", + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.0987853648 + "score":0.251920694 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"am", + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.2172965208 + "score":0.4662583176 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"am", + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.0037326563 + "score":0.1647980206 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"am", + "model":"google\/gemma-3-27b-it", + "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.0273289802 + "score":0.4166796691 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"apc", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"apc", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ne", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.0349474752 + "score":0.3022338928 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"apc", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.205929391 + "score":0.5587522289 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"apc", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.0109943205 + "score":0.2252421952 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"apc", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.1135056868 + "score":0.4768786292 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"nl", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"nl", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.0231513455 + "score":0.2793746981 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.1669816885 + "score":0.5246312011 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.0192052173 + "score":0.3496466203 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ar", + "model":"google\/gemma-3-27b-it", + "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.1199941852 + "score":0.6032151622 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ary", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ny", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0482644911 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.1570610957 + "score":0.5 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.0 + "model":"google\/gemma-3-27b-it", + "bcp_47":"ny", + "task":"classification", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.0671140896 + "model":"google\/gemma-3-27b-it", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"arz", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ny", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"arz", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0333540125 + "score":0.119086784 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"arz", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.1654807425 + "score":0.3898511388 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"arz", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0300458941 + "score":0.0923649849 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"arz", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.1417391491 + "score":0.4837931302 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"as", + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"as", + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"as", + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0153668157 + "score":0.0417850648 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"as", + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.1119995149 + "score":0.2509675066 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"as", + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0988733729 + "score":0.0312813941 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"as", + "model":"google\/gemma-3-27b-it", + "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.2096173766 + "score":0.2886309955 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"awa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"awa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"awa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.0696643378 + "score":0.2613495089 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"awa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.1931858455 + "score":0.5009335042 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"awa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.0309816185 + "score":0.1648455996 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"awa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.1018697927 + "score":0.3943041737 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"az", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"az", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"az", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.0634083235 + "score":0.4164890636 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"az", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.1819427877 + "score":0.6375470445 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"az", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.0728528728 + "score":0.4166823661 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"az", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.2068099712 + "score":0.575314128 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"be", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"pl", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"be", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"be", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pl", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.0726648889 + "score":0.2913506513 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"be", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.2430791182 + "score":0.5431985912 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"be", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.0528414924 + "score":0.3723742743 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"be", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.1724803068 + "score":0.5891983505 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bho", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ps", + "task":"arc", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bho", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1903946317 + "model":"google\/gemma-3-27b-it", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.3145924631 + "model":"google\/gemma-3-27b-it", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.0 + "model":"google\/gemma-3-27b-it", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.0310651444 + "model":"google\/gemma-3-27b-it", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bm", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pt", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bm", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.340245547 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bm", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.5681284927 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bm", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.4702737577 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bm", + "model":"google\/gemma-3-27b-it", + "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.6903236014 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ro", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ro", + "task":"mmlu", + "metric":"accuracy", "score":0.3 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.0425897525 + "score":0.272965046 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.1329517484 + "score":0.5527916308 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.1071842556 + "score":0.5195197328 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"bn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.2292182808 + "score":0.6892729705 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ca", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ca", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.2862936285 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ca", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.5264436928 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ca", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.4800957551 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ca", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.6618495803 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ceb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ceb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ceb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.1295041316 + "score":0.1701995093 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ceb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.2647572228 + "score":0.4262662427 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ceb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.0551171959 + "score":0.1513262342 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ceb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.2019792449 + "score":0.4732082637 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ckb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ckb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ckb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.1181725504 + "score":0.2365858071 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ckb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.3417239653 + "score":0.4722212406 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ckb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0030057917 + "score":0.2251623508 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ckb", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.050016858 + "score":0.4159341653 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"cs", + "model":"google\/gemma-3-27b-it", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"cs", + "model":"google\/gemma-3-27b-it", + "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"cs", + "model":"google\/gemma-3-27b-it", + "bcp_47":"si", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.1106488773 + "score":0.226689844 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"cs", + "model":"google\/gemma-3-27b-it", + "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.2661126449 + "score":0.4706510499 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"cs", + "model":"google\/gemma-3-27b-it", + "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.0566112691 + "score":0.2258552473 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"cs", + "model":"google\/gemma-3-27b-it", + "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.2161355093 + "score":0.4191499082 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", "score":0.3 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.1071586446 + "score":0.0756830418 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.2867516145 + "score":0.3184767575 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.1380160311 + "score":0.0495523985 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"de", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.3472666619 + "score":0.3971096934 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"el", + "model":"google\/gemma-3-27b-it", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"el", + "model":"google\/gemma-3-27b-it", + "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"el", + "model":"google\/gemma-3-27b-it", + "bcp_47":"so", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.1228102479 + "score":0.1915993132 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"el", + "model":"google\/gemma-3-27b-it", + "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.257059719 + "score":0.4208812642 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"el", + "model":"google\/gemma-3-27b-it", + "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0709906394 + "score":0.1724511246 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"el", + "model":"google\/gemma-3-27b-it", + "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.1615768924 + "score":0.4741419887 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sr", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.3271580119 + "score":0.245439349 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.4059591673 + "score":0.536270172 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.4705724484 + "score":0.3929818488 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"en", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5810510479 + "score":0.5787667028 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.0612440394 + "score":0.1784974236 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.214820597 + "score":0.4520828188 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.0718408174 + "score":0.1435021957 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"es", + "model":"google\/gemma-3-27b-it", + "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.2974455623 + "score":0.4868234587 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.1093307519 + "score":0.3258404036 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.2212819207 + "score":0.5652149653 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.0461058798 + "score":0.4264864443 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fa", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.1370841628 + "score":0.6543542662 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fil", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fil", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.1005924157 + "score":0.268709657 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fil", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.202708484 + "score":0.5306834056 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fil", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.0160833262 + "score":0.3395981599 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fil", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.1960110619 + "score":0.6130756934 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fr", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"sw", + "task":"truthfulqa", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fr", - "task":"mgsm", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", + "task":"arc", "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.1269813968 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.3065913664 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.1789987295 + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", + "task":"classification", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.3463028707 + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fuv", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fuv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.014745965 + "score":0.2563448403 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fuv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.0749091363 + "score":0.5007966916 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fuv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.2405135195 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"fuv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.0895914252 + "score":0.5564984925 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"gu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"gu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"gu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.097971755 + "score":0.2787922254 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"gu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.1777195033 + "score":0.5420797212 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"gu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.0318870555 + "score":0.259866454 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"gu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.1529629731 + "score":0.48543634 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ha", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tg", "task":"classification", "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ha", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.0369448389 + "score":0.2018690154 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ha", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.1480560277 + "score":0.4335923466 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ha", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.0919802251 + "score":0.0972794658 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ha", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.2629251679 + "score":0.327182503 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hi", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"th", + "task":"arc", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hi", - "task":"mgsm", + "model":"google\/gemma-3-27b-it", + "bcp_47":"th", + "task":"classification", "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.2538230452 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.3759994265 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.0835774684 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.1904298158 + "model":"google\/gemma-3-27b-it", + "bcp_47":"th", + "task":"mgsm", + "metric":"accuracy", + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hne", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"th", + "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hne", + "model":"google\/gemma-3-27b-it", + "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.0800273183 + "score":0.2400131449 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hne", + "model":"google\/gemma-3-27b-it", + "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.2260677118 + "score":0.4851690277 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hne", + "model":"google\/gemma-3-27b-it", + "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.0220446366 + "score":0.383380628 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hne", + "model":"google\/gemma-3-27b-it", + "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.0997285023 + "score":0.5430720239 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ht", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ht", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.0736674948 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ht", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.2974206944 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ht", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0241026131 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ht", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.1246172628 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tr", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.0883989834 + "score":0.2757340333 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.206785454 + "score":0.5244536559 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.0790692679 + "score":0.3185578758 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"hu", + "model":"google\/gemma-3-27b-it", + "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.28304649 + "score":0.5765088485 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uk", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uk", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.0773479102 + "score":0.2918547905 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.2587381281 + "score":0.5631912653 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.0433423647 + "score":0.3283437369 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"id", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.2817069358 + "score":0.5565790802 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ig", + "model":"google\/gemma-3-27b-it", + "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ig", + "model":"google\/gemma-3-27b-it", + "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0612163002 + "score":0.0359452883 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ig", + "model":"google\/gemma-3-27b-it", + "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.2381342912 + "score":0.1779043042 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ig", + "model":"google\/gemma-3-27b-it", + "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0426402139 + "score":0.0305779168 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ig", + "model":"google\/gemma-3-27b-it", + "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.1696737174 + "score":0.1838354035 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ilo", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ilo", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ilo", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.0970247032 + "score":0.2595582459 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ilo", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.2111718332 + "score":0.5043992681 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ilo", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.0050070462 + "score":0.2142625601 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ilo", + "model":"google\/gemma-3-27b-it", + "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.1683969558 + "score":0.429749938 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.139001615 + "score":0.2595944841 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.2668336456 + "score":0.5081810113 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.0475935646 + "score":0.2601189518 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"it", + "model":"google\/gemma-3-27b-it", + "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.2602541027 + "score":0.5225655991 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.0609909305 + "score":0.2279880384 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.1860226608 + "score":0.4835933272 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.0692256682 + "score":0.3478085621 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ja", + "model":"google\/gemma-3-27b-it", + "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.1449932716 + "score":0.5968604742 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0495085803 + "score":0.065945115 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.1459303026 + "score":0.2358663461 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0098682093 + "score":0.0540055322 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"jv", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.1292642853 + "score":0.2390749172 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ki", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ki", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.246042863 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ki", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.4917114856 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ki", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.1017188886 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ki", + "model":"google\/gemma-3-27b-it", + "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.1707828137 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kk", + "model":"google\/gemma-3-27b-it", + "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kk", + "model":"google\/gemma-3-27b-it", + "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kk", + "model":"google\/gemma-3-27b-it", + "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.0827091376 + "score":0.167004472 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kk", + "model":"google\/gemma-3-27b-it", + "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.2337306985 + "score":0.400944552 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kk", + "model":"google\/gemma-3-27b-it", + "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0492400989 + "score":0.066271851 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kk", + "model":"google\/gemma-3-27b-it", + "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.155912411 + "score":0.3937495329 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"km", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yo", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"km", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"km", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yo", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0860125484 + "score":0.0472060067 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"km", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.1623830923 + "score":0.2924612708 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"km", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0024514101 + "score":0.0525309984 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"km", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.0781695805 + "score":0.2304098638 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.0937784828 + "score":0.2124709579 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.2195953032 + "score":0.4811646042 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.0577688436 + "score":0.1392232 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"kn", + "model":"google\/gemma-3-27b-it", + "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.1510063998 + "score":0.2205120991 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zh", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.0680252346 + "score":0.1900086584 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.2091636742 + "score":0.4895930442 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.0555477009 + "score":0.2395565562 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ko", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.0549298013 + "score":0.3237759485 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"lua", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zu", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"lua", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zu", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"google\/gemma-3-27b-it", + "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.0036630043 + "score":0.1607803472 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"lua", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.099240382 + "score":0.4377738064 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"lua", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0532977337 + "score":0.1538390263 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"lua", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.2153600503 + "score":0.4751516021 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mag", - "task":"classification", + "model":"google\/gemma-3-27b-it", + "bcp_47":"zu", + "task":"truthfulqa", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mag", + "model":"google\/translate-v2", + "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.1425685645 + "score":0.3397504765 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mag", + "model":"google\/translate-v2", + "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.3016796517 + "score":0.560449359 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mag", + "model":"google\/translate-v2", + "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.0118929014 + "score":0.278826715 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mag", + "model":"google\/translate-v2", + "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.0840825564 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.4815690002 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mai", + "model":"google\/translate-v2", + "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.115229855 + "score":0.2795001892 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mai", + "model":"google\/translate-v2", + "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.2180450998 + "score":0.5119662189 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mai", + "model":"google\/translate-v2", + "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0163179059 + "score":0.1719218154 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mai", + "model":"google\/translate-v2", + "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.0417840087 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.4619906072 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mg", + "model":"google\/translate-v2", + "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0410364581 + "score":0.2955792162 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mg", + "model":"google\/translate-v2", + "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.2193286405 + "score":0.5460142346 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mg", + "model":"google\/translate-v2", + "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0422799242 + "score":0.2550661243 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mg", + "model":"google\/translate-v2", + "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.1425526516 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.3516234079 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ml", + "model":"google\/translate-v2", + "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.0630918824 + "score":0.3399225795 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ml", + "model":"google\/translate-v2", + "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.1710971727 + "score":0.5942330704 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ml", + "model":"google\/translate-v2", + "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.0386225926 + "score":0.2816905761 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ml", + "model":"google\/translate-v2", + "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.1445595523 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.5676475667 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mr", + "model":"google\/translate-v2", + "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.1084731704 + "score":0.3493985929 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mr", + "model":"google\/translate-v2", + "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.2498733805 + "score":0.5975748844 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mr", + "model":"google\/translate-v2", + "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.3475330474 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"mr", + "model":"google\/translate-v2", + "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.0399406962 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.5776317086 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ms", + "model":"google\/translate-v2", + "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.033427329 + "score":0.2092670256 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ms", + "model":"google\/translate-v2", + "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.1616696801 + "score":0.466339127 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ms", + "model":"google\/translate-v2", + "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.115786224 + "score":0.1728000073 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ms", + "model":"google\/translate-v2", + "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3018549986 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.4285875773 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"my", + "model":"google\/translate-v2", + "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.2294643956 + "score":0.2747967998 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"my", + "model":"google\/translate-v2", + "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.3457333311 + "score":0.5090740494 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"my", + "model":"google\/translate-v2", + "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.1065767565 + "score":0.2401570931 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"my", + "model":"google\/translate-v2", + "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.2047469752 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.4670149488 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ne", + "model":"google\/translate-v2", + "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.0951128399 + "score":0.2674941424 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ne", + "model":"google\/translate-v2", + "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.2178534884 + "score":0.541891802 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ne", + "model":"google\/translate-v2", + "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0398161454 + "score":0.1979823055 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ne", + "model":"google\/translate-v2", + "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.1412294262 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 + "score":0.4521218857 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"nl", + "model":"google\/translate-v2", + "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.0563796721 + "score":0.3768023433 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"nl", + "model":"google\/translate-v2", + "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.1918127836 + "score":0.6041064745 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"nl", + "model":"google\/translate-v2", + "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.0887408505 + "score":0.2943485815 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"nl", + "model":"google\/translate-v2", + "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.3444852597 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.5100804178 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ny", + "model":"google\/translate-v2", + "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.0322569074 + "score":0.2273493056 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ny", + "model":"google\/translate-v2", + "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.1681655692 + "score":0.4822061401 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ny", + "model":"google\/translate-v2", + "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.0133097605 + "score":0.1907459838 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ny", + "model":"google\/translate-v2", + "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.1890356187 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 + "score":0.4304499853 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"om", + "model":"google\/translate-v2", + "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.0380467697 + "score":0.2698506992 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"om", + "model":"google\/translate-v2", + "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.11665963 + "score":0.5332909304 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"om", + "model":"google\/translate-v2", + "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.0196821089 + "score":0.3805163094 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"om", + "model":"google\/translate-v2", + "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.1887330103 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.5444910857 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"or", + "model":"google\/translate-v2", + "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.0987048471 + "score":0.3207673833 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"or", + "model":"google\/translate-v2", + "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.2474195062 + "score":0.5578909014 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"or", + "model":"google\/translate-v2", + "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.0246921682 + "score":0.1931718671 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"or", + "model":"google\/translate-v2", + "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.1089265139 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.4102436779 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pa", + "model":"google\/translate-v2", + "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2220426626 + "score":0.2940937001 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pa", + "model":"google\/translate-v2", + "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.3598952341 + "score":0.5656960013 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pa", + "model":"google\/translate-v2", + "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.1611882376 + "score":0.4105743367 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pa", + "model":"google\/translate-v2", + "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.2396561128 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 + "score":0.582274226 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pl", + "model":"google\/translate-v2", + "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.0904332342 + "score":0.455106564 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pl", + "model":"google\/translate-v2", + "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.2068730334 + "score":0.6691241367 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pl", + "model":"google\/translate-v2", + "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.046065351 + "score":0.5332526559 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pl", + "model":"google\/translate-v2", + "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.1925827805 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 + "score":0.7086055004 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pt", + "model":"google\/translate-v2", + "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.1079576302 + "score":0.2827542245 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pt", + "model":"google\/translate-v2", + "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.2645017017 + "score":0.5361942504 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pt", + "model":"google\/translate-v2", + "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0569295003 + "score":0.3935667187 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"pt", + "model":"google\/translate-v2", + "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.2615431845 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 + "score":0.6110160857 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ro", + "model":"google\/translate-v2", + "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.131593328 + "score":0.3504448262 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ro", + "model":"google\/translate-v2", + "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.3060223321 + "score":0.6171883377 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ro", + "model":"google\/translate-v2", + "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.0757294413 + "score":0.4493928736 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ro", + "model":"google\/translate-v2", + "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.2150897212 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.0 + "score":0.6238587383 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ru", + "model":"google\/translate-v2", + "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.0926690761 + "score":0.4085073951 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ru", + "model":"google\/translate-v2", + "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.2397253952 + "score":0.6323019852 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ru", + "model":"google\/translate-v2", + "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.1044259789 + "score":0.5247754427 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ru", + "model":"google\/translate-v2", + "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.236302624 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 + "score":0.7142099767 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"rw", + "model":"google\/translate-v2", + "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.0153852885 + "score":0.3267673394 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"rw", + "model":"google\/translate-v2", + "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.1605190155 + "score":0.5607576056 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"rw", + "model":"google\/translate-v2", + "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.0600552316 + "score":0.3745254965 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"rw", + "model":"google\/translate-v2", + "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.2182309051 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 + "score":0.5676283692 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sd", + "model":"google\/translate-v2", + "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.120327349 + "score":0.6256942034 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sd", + "model":"google\/translate-v2", + "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.2042820289 + "score":0.7540191814 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sd", + "model":"google\/translate-v2", + "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.0309396387 + "score":0.70888051 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sd", + "model":"google\/translate-v2", + "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.1043645846 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.8684926816 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"si", + "model":"google\/translate-v2", + "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.0861987344 + "score":0.350374858 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"si", + "model":"google\/translate-v2", + "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.2079338406 + "score":0.5997407835 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"si", + "model":"google\/translate-v2", + "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.0196434254 + "score":0.3746387789 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"si", + "model":"google\/translate-v2", + "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.0610643505 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.6101421618 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sn", + "model":"google\/translate-v2", + "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.0320247445 + "score":0.3295944742 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sn", + "model":"google\/translate-v2", + "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.2176774161 + "score":0.5865092795 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sn", + "model":"google\/translate-v2", + "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.0700005142 + "score":0.2930219204 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sn", + "model":"google\/translate-v2", + "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.1952413901 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.5297678901 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"so", + "model":"google\/translate-v2", + "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.0252575003 + "score":0.4111750064 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"so", + "model":"google\/translate-v2", + "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.157057904 + "score":0.6314637291 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"so", + "model":"google\/translate-v2", + "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.0629444831 + "score":0.4316058282 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"so", + "model":"google\/translate-v2", + "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.1833979697 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 + "score":0.6598411557 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sr", + "model":"google\/translate-v2", + "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.0868186974 + "score":0.3779603397 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sr", + "model":"google\/translate-v2", + "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.2586575848 + "score":0.6286963509 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sr", + "model":"google\/translate-v2", + "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.1093378877 + "score":0.5835846952 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sr", + "model":"google\/translate-v2", + "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.2502315423 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.7468900473 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"su", + "model":"google\/translate-v2", + "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0782352762 + "score":0.1585736619 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"su", + "model":"google\/translate-v2", + "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2378174872 + "score":0.3595908619 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"su", + "model":"google\/translate-v2", + "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0338179306 + "score":0.0281783964 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"su", + "model":"google\/translate-v2", + "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.2284098709 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 + "score":0.2135990911 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sv", + "model":"google\/translate-v2", + "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.1179976804 + "score":0.3338203117 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sv", + "model":"google\/translate-v2", + "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.2633320797 + "score":0.550900416 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sv", + "model":"google\/translate-v2", + "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.0494945427 + "score":0.2235904654 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sv", + "model":"google\/translate-v2", + "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.2411403917 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 + "score":0.4889537149 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sw", + "model":"google\/translate-v2", + "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.1282740208 + "score":0.3014462049 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sw", + "model":"google\/translate-v2", + "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.2521886667 + "score":0.5332346012 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sw", + "model":"google\/translate-v2", + "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.0748708455 + "score":0.2941569015 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"sw", + "model":"google\/translate-v2", + "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.2420611826 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.5452786239 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ta", + "model":"google\/translate-v2", + "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.1073230839 + "score":0.432237812 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ta", + "model":"google\/translate-v2", + "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.1798254279 + "score":0.6378291521 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ta", + "model":"google\/translate-v2", + "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.0317538654 + "score":0.4001439439 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ta", + "model":"google\/translate-v2", + "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.2038003362 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 + "score":0.6257483281 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"te", + "model":"google\/translate-v2", + "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.1951561832 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"te", + "model":"google\/translate-v2", + "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.3697480899 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"te", + "model":"google\/translate-v2", + "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1899598166 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"te", + "model":"google\/translate-v2", + "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.2894416622 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tg", + "model":"google\/translate-v2", + "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.0653258846 + "score":0.3369838412 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tg", + "model":"google\/translate-v2", + "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.2205970586 + "score":0.564308487 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tg", + "model":"google\/translate-v2", + "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.0222271283 + "score":0.4344044669 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tg", + "model":"google\/translate-v2", + "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.120000505 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 + "score":0.6654473209 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"th", + "model":"google\/translate-v2", + "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.1262768628 + "score":0.3943233817 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"th", + "model":"google\/translate-v2", + "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.2570885899 + "score":0.6168947522 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"th", + "model":"google\/translate-v2", + "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.0575688551 + "score":0.5116660025 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"th", + "model":"google\/translate-v2", + "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.1582234491 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.730698655 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ti", + "model":"google\/translate-v2", + "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.3708158915 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ti", + "model":"google\/translate-v2", + "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.062613791 + "score":0.5993132477 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ti", + "model":"google\/translate-v2", + "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.0059519046 + "score":0.2705214178 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ti", + "model":"google\/translate-v2", + "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.0274291267 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 + "score":0.504548883 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tr", + "model":"google\/translate-v2", + "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.0337787053 + "score":0.3900081426 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tr", + "model":"google\/translate-v2", + "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.1824544035 + "score":0.6052122639 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tr", + "model":"google\/translate-v2", + "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.1405643754 + "score":0.3336718595 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"tr", + "model":"google\/translate-v2", + "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.281387297 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.5750387432 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uk", + "model":"google\/translate-v2", + "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.1091041441 + "score":0.3278554945 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uk", + "model":"google\/translate-v2", + "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.2184696361 + "score":0.5743022789 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uk", + "model":"google\/translate-v2", + "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.1508474535 + "score":0.3394020951 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uk", + "model":"google\/translate-v2", + "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.234056096 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.0 + "score":0.5938537899 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"umb", + "model":"google\/translate-v2", + "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.0058005929 + "score":0.3409989486 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"umb", + "model":"google\/translate-v2", + "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.0841095708 + "score":0.5959288844 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"umb", + "model":"google\/translate-v2", + "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.3711980077 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"umb", + "model":"google\/translate-v2", + "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.1328221884 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.4848412412 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ur", + "model":"google\/translate-v2", + "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.1386027419 + "score":0.3673114251 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ur", + "model":"google\/translate-v2", + "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.3084595255 + "score":0.6183652016 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ur", + "model":"google\/translate-v2", + "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.0167447692 + "score":0.3709969529 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"ur", + "model":"google\/translate-v2", + "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.0960347815 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.598464243 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uz", + "model":"google\/translate-v2", + "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.0342337572 + "score":0.3570145905 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uz", + "model":"google\/translate-v2", + "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.1153353221 + "score":0.5887718416 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uz", + "model":"google\/translate-v2", + "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.0694684605 + "score":0.3713033391 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"uz", + "model":"google\/translate-v2", + "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.2369528571 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.604716209 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"vi", + "model":"google\/translate-v2", + "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.1615259525 + "score":0.3851664104 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"vi", + "model":"google\/translate-v2", + "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.2808117936 + "score":0.6312237305 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"vi", + "model":"google\/translate-v2", + "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1251511875 + "score":0.1874455996 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"vi", + "model":"google\/translate-v2", + "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3058766946 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.4426393743 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wo", + "model":"google\/translate-v2", + "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.0065958006 + "score":0.3113098415 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wo", + "model":"google\/translate-v2", + "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.0642484091 + "score":0.5501022834 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wo", + "model":"google\/translate-v2", + "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.0043862302 + "score":0.3347737931 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wo", + "model":"google\/translate-v2", + "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.1616631945 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.0 + "score":0.5576944014 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wuu", + "model":"google\/translate-v2", + "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.0666328001 + "score":0.2822808126 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wuu", + "model":"google\/translate-v2", + "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.2282939681 + "score":0.5526101149 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wuu", + "model":"google\/translate-v2", + "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.0063550665 + "score":0.2612977966 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"wuu", + "model":"google\/translate-v2", + "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.0402314549 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 + "score":0.3457225363 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"xh", + "model":"google\/translate-v2", + "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0731036446 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"xh", + "model":"google\/translate-v2", + "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.1810894045 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"xh", + "model":"google\/translate-v2", + "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0117190064 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"xh", + "model":"google\/translate-v2", + "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.1631103727 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yo", + "model":"google\/translate-v2", + "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.058648798 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yo", + "model":"google\/translate-v2", + "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.162917811 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yo", + "model":"google\/translate-v2", + "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.0647492243 + "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yo", + "model":"google\/translate-v2", + "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.1522863767 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yue", + "model":"google\/translate-v2", + "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.1133445002 + "score":0.3348942842 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yue", + "model":"google\/translate-v2", + "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.2630654346 + "score":0.5861344551 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yue", + "model":"google\/translate-v2", + "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.0177899901 + "score":0.1311732143 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"yue", + "model":"google\/translate-v2", + "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.0452074918 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 + "score":0.4350789061 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zh", + "model":"google\/translate-v2", + "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.0949407188 + "score":0.2903894802 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zh", + "model":"google\/translate-v2", + "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.2042376654 + "score":0.5623472971 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zh", + "model":"google\/translate-v2", + "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.2190660395 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zh", + "model":"google\/translate-v2", + "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.0169223477 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.5006362228 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zu", + "model":"google\/translate-v2", + "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.0689235431 + "score":0.339831623 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zu", + "model":"google\/translate-v2", + "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.2000811037 + "score":0.590846484 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zu", + "model":"google\/translate-v2", + "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.0413489342 + "score":0.3016318322 }, { - "model":"google\/gemini-2.5-pro-preview-05-06", - "bcp_47":"zu", + "model":"google\/translate-v2", + "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.1904482997 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "score":0.5461894184 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", + "model":"google\/translate-v2", + "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2073802913 + "score":0.3491068707 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", + "model":"google\/translate-v2", + "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.4889223975 + "score":0.5803894973 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", + "model":"google\/translate-v2", + "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.0840656979 + "score":0.3274616019 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", + "model":"google\/translate-v2", + "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.3453561943 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 + "score":0.5109521029 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", + "model":"google\/translate-v2", + "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.3962757824 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", + "model":"google\/translate-v2", + "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.6224286451 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", + "model":"google\/translate-v2", + "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.5032472209 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", + "model":"google\/translate-v2", + "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 + "score":0.7257127115 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", + "model":"google\/translate-v2", + "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.120094546 + "score":0.3361081405 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", + "model":"google\/translate-v2", + "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.3259782194 + "score":0.5602875655 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", + "model":"google\/translate-v2", + "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.0974181135 + "score":0.282320421 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", + "model":"google\/translate-v2", + "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.3477814679 + "score":0.4830195157 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 + "model":"google\/translate-v2", + "bcp_47":"ne", + "task":"translation_from", + "metric":"bleu", + "score":0.339447252 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "model":"google\/translate-v2", + "bcp_47":"ne", + "task":"translation_from", + "metric":"chrf", + "score":0.5865985454 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 + "model":"google\/translate-v2", + "bcp_47":"ne", + "task":"translation_to", + "metric":"bleu", + "score":0.3120075365 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 + "model":"google\/translate-v2", + "bcp_47":"ne", + "task":"translation_to", + "metric":"chrf", + "score":0.5354876043 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", + "model":"google\/translate-v2", + "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2393172056 + "score":0.3500838996 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", + "model":"google\/translate-v2", + "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.4971254293 + "score":0.5820135911 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", + "model":"google\/translate-v2", + "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.2089212841 + "score":0.3833463355 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", + "model":"google\/translate-v2", + "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.3406916002 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "score":0.630764328 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", + "model":"google\/translate-v2", + "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.2712045148 + "score":0.2752866209 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", + "model":"google\/translate-v2", + "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.5477096036 + "score":0.5470670325 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", + "model":"google\/translate-v2", + "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.178052271 + "score":0.2362408388 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", + "model":"google\/translate-v2", + "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.468064885 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 + "score":0.5649412405 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", + "model":"google\/translate-v2", + "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.2747843596 + "score":0.2628008901 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", + "model":"google\/translate-v2", + "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.5519960681 + "score":0.5081811686 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", + "model":"google\/translate-v2", + "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.2863967069 + "score":0.0993493347 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", + "model":"google\/translate-v2", + "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.5318173199 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "score":0.456860433 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", + "model":"google\/translate-v2", + "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.1096694862 + "score":0.3258874325 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", + "model":"google\/translate-v2", + "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.4291604898 + "score":0.5886625327 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", + "model":"google\/translate-v2", + "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1630720543 + "score":0.321631251 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3952400339 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1892846534 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4212342522 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1938470016 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4527968539 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2094379574 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4509809217 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1931386564 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.4233010233 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2957522582 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5232039352 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2308361669 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4087255612 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.200456445 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4226152307 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1414132922 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4170843853 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.18522743 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4467570037 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2590661095 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4657468506 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2663307677 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.519985227 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1913577407 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4064669591 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.271237739 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5173954387 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3067537945 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5194482945 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3694979709 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.59081536 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.374702944 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6019503341 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.2792699678 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.5157552806 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.2334415639 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.5128705295 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3536861453 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.6024608455 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.4031829559 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.6234553711 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3189602129 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5548503533 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.5229096392 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.7023434262 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3137252517 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5147981205 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3302929673 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.505425141 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.3889146477 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.628092835 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.4660772497 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7280386297 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3593767686 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5668073679 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3662275621 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6102640711 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.286051969 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5373856549 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1816947237 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3981159206 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3579818144 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5889481625 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3403832088 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.590264879 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3180384008 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5571267732 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5778354146 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7490356238 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0380719948 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2066039108 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0259757351 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.147148937 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.34811918 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5482709715 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1618983325 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4411905252 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1925315551 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4032389241 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.1992720083 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4905441802 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3722751955 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6207213131 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3378499277 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.554090013 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2879989689 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.5083598943 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1719225434 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3992950999 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.3234067809 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5706707095 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3187264685 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5856828402 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3583744222 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5732194975 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3364664006 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6438910651 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.225000401 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4567066441 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1941055199 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4504811493 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1944477164 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4517028309 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.2035517344 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.489419705 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3074361781 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5178180754 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3505959215 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5955060476 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2581140706 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5395853617 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3298839393 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4471547552 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3177915441 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5387853038 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2549228547 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5322440265 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1983700044 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4843458319 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2920207746 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5345155349 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3702042307 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5776853975 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1498433716 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3652702605 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2858443353 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.5499221943 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2481102245 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4829685786 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2305492704 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4826740501 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.1746024172 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3073554703 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0913600379 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.3305636235 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0269728382 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.3155017027 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3751831337 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.6085851316 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.259988405 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5046714005 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.306099972 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5370842801 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1736022871 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4305653856 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2826629018 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.5215979873 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.238462643 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5334745774 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3161992509 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5479755911 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2133071404 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4660281027 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2370074805 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.4804215458 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2399769139 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4726429935 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3104483533 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5705763492 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.4229626959 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6856510383 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2227645269 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4888582617 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.1869632744 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4322398057 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3115387303 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5342290246 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.22081567 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4878836055 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.3125704924 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5397676594 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3586968371 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6075205554 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.1901221224 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.438728736 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1330024304 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4711022084 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0258426139 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2237241232 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0460531144 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3418147419 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.3714452662 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5977153904 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.2234825764 - }, - { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.4562477173 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.5362369434 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4180718844 + "score":0.4482674529 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6426219278 + "score":0.671945393 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.3938693136 + "score":0.5160129517 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5573992167 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.6445374779 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2456102401 + "score":0.3157581247 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5190609119 + "score":0.5682347228 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.4265619216 + "score":0.3771434243 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.6320824157 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.6045220423 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3537745123 + "score":0.3489983932 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5904429929 + "score":0.5800455435 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4588664196 + "score":0.5650298473 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6844540285 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.7365285421 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.3139442337 + "score":0.3390274579 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5741447282 + "score":0.592940935 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.5016049999 + "score":0.5470657372 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6788048008 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 + "score":0.7020072444 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2449777422 + "score":0.3013142128 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5268764903 + "score":0.5656623498 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3789708434 + "score":0.5178438056 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5790333031 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 + "score":0.6867971436 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.1839360587 + "score":0.3354195212 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.448997409 + "score":0.5741211618 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.1891835724 + "score":0.3462677897 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.508623725 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.5833767681 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.3352727297 + "score":0.3412028977 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.5583215205 + "score":0.5889369863 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.2009000601 + "score":0.3852572206 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.4004383195 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 + "score":0.5784169857 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.2297304995 + "score":0.2698751119 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.5040607132 + "score":0.5340401081 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.2136543311 + "score":0.4096366215 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.3916393466 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 + "score":0.5525266748 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.1221415503 + "score":0.256568307 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.3516954503 + "score":0.4900607089 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.108688779 + "score":0.1305127177 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.4515663403 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 + "score":0.4214140091 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.2308889646 + "score":0.3156233999 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.4618048204 + "score":0.5490670273 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.1667302795 + "score":0.1872166048 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.4717296026 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.4920219369 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2351861569 + "score":0.321113344 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.5329036218 + "score":0.5864222708 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.4168384094 + "score":0.4884555721 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.6032787874 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.6556325596 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.2291561983 + "score":0.352233215 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4673987803 + "score":0.5470765309 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.2036733766 + "score":0.1904552367 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.5047620958 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 + "score":0.4562964405 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.3500384253 + "score":0.3843214006 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5797456052 + "score":0.6136910044 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.4268868445 + "score":0.4689482853 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.664863412 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.6877930778 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.3005035588 + "score":0.4161610215 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5210660172 + "score":0.6455274177 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.3401968092 + "score":0.5117468349 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6217197146 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.7232937985 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2662307086 + "score":0.3318364746 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.5053585639 + "score":0.5738929543 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.3044345778 + "score":0.3895968702 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5714036731 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.6277778554 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3330093484 + "score":0.4077995927 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5694168709 + "score":0.6409257804 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.257812168 + "score":0.485273374 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5167002436 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.2176170344 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4496406258 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.2605536967 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4816066849 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.6710025354 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2992360169 + "score":0.3269754516 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.5585599708 + "score":0.5639027355 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.4269079012 + "score":0.465639801 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.5719718715 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 + "score":0.597395155 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.2042995208 + "score":0.2639320429 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.416626147 + "score":0.5034191891 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0833250166 + "score":0.2139020366 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.1982489294 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.3242506245 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2832304201 + "score":0.3693666266 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5467240003 + "score":0.6134245868 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3273464288 + "score":0.3936947375 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5827048506 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 + "score":0.6497039072 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2813742416 + "score":0.2907704167 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.5413704266 + "score":0.5698553329 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.4063054094 + "score":0.4401277302 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.6020718231 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0495917134 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1711087397 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0358872001 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.2761667256 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.6278421339 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.322448107 + "score":0.3080488172 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5887654616 + "score":0.5695112482 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2148139783 + "score":0.3188563568 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4226865444 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 + "score":0.5116789278 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.2829644119 + "score":0.273125871 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.5194956482 + "score":0.5508470442 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.2911955464 + "score":0.3030324343 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.5560139888 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 + "score":0.5648891805 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2745000434 + "score":0.3603047797 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5206422805 + "score":0.5927521365 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.4261790941 + "score":0.4395780689 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6358462464 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.6552870615 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0696458062 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.2461140434 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0728989985 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.2267265908 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.2072386748 + "score":0.3662176152 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4628288648 + "score":0.5856640284 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1471425714 + "score":0.1500486487 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1971299212 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 + "score":0.2162606152 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.253783308 + "score":0.3163153725 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.4487387303 + "score":0.5712728237 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0662544821 + "score":0.1386875315 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3784904721 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0887390501 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.3201148841 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.1178050815 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.307433063 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 + "task":"translation_to", + "metric":"chrf", + "score":0.469849511 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", + "model":"google\/translate-v2", + "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.2253512269 + "score":0.1925403782 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", + "model":"google\/translate-v2", + "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.4949150094 + "score":0.4228528325 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", + "model":"google\/translate-v2", + "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.1897306863 + "score":0.0437670613 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", + "model":"google\/translate-v2", + "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.2561574259 + "score":0.2053727616 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 + "model":"google\/translate-v2", + "bcp_47":"yue", + "task":"translation_from", + "metric":"bleu", + "score":0.2311956685 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "model":"google\/translate-v2", + "bcp_47":"yue", + "task":"translation_from", + "metric":"chrf", + "score":0.5099861434 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 + "model":"google\/translate-v2", + "bcp_47":"yue", + "task":"translation_to", + "metric":"bleu", + "score":0.281120015 }, { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 + "model":"google\/translate-v2", + "bcp_47":"yue", + "task":"translation_to", + "metric":"chrf", + "score":0.3263629293 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2201641871 + "score":0.3441147842 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5051068628 + "score":0.6121137924 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2508351517 + "score":0.4192686299 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3110461024 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 + "score":0.4558586669 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.252616884 + "score":0.353693059 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.4822778382 + "score":0.607730412 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.1940901676 + "score":0.3303018306 }, { - "model":"google\/gemini-flash-1.5", + "model":"google\/translate-v2", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.4661416094 + "score":0.5960312224 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.2041309024 + "score":0.0750313913 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.4630820951 + "score":0.2689370364 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.1453469275 + "score":0.1097950919 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.3874336138 + "score":0.2459305972 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", + "score":0.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.1059711376 + "score":0.04291871 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.300568481 + "score":0.1992226055 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0110669593 + "score":0.0012692029 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.2141540563 + "score":0.1055962738 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.1906134629 + "score":0.0135029462 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.4467868389 + "score":0.1510010912 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.2001643223 + "score":0.0033288372 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.3225170104 + "score":0.0389705109 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.1876459632 + "score":0.0592251547 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.4830875841 + "score":0.2921276604 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.1830944017 + "score":0.0366276845 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.4175337587 + "score":0.2393327958 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.260683336 + "score":0.0837265107 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5383651277 + "score":0.3012065838 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.2962406565 + "score":0.1120670716 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5190026627 + "score":0.2771296913 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1438491224 + "score":0.0213908698 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.418499848 + "score":0.2206299292 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1044262978 + "score":0.0285726559 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3577242047 + "score":0.2326683564 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.1675595946 + "score":0.0290259599 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4187188467 + "score":0.2238098591 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.1545869288 + "score":0.0597935462 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4031218248 + "score":0.2388670431 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.1892328534 + "score":0.0254218054 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.4434206925 + "score":0.1827114877 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.1070430926 + "score":0.0012328171 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.3318636339 + "score":0.1200208328 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.3180630074 + "score":0.0888105743 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5401606876 + "score":0.2644372522 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.1993490206 + "score":0.0668939667 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.4215901923 + "score":0.2520473985 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.1604267099 + "score":0.0148972561 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.4139767864 + "score":0.2336350172 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1651025864 + "score":0.0047574121 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.3978212407 + "score":0.1922357185 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.19213953 + "score":0.0366814427 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.4512512424 + "score":0.2731193887 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.2406657525 + "score":0.0173677773 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.4536513075 + "score":0.2136838993 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.2519150677 + "score":0.0581882104 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.4966963131 + "score":0.2702416532 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.2057435019 + "score":0.0562052656 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.4016427491 + "score":0.2181774858 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2183929994 + "score":0.021858254 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4877941086 + "score":0.2327499821 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.2700916391 + "score":0.0253088472 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5150577414 + "score":0.1918662187 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.2900668497 + "score":0.1238388635 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.5379961095 + "score":0.3442960257 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.3193377157 + "score":0.0243559813 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.5978978692 + "score":0.2777667131 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.2457083208 + "score":0.0155834504 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.4937183307 + "score":0.1833348617 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.1424911854 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.3546559531 + "score":0.1279598659 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.2481120403 + "score":0.1272159331 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.5339550423 + "score":0.3713000806 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.3131426524 + "score":0.1249879163 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5548197404 + "score":0.332725923 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3440655166 + "score":0.1784955678 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.563902418 + "score":0.4008799371 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4305522274 + "score":0.1782663616 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6477508732 + "score":0.4070825897 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2080428665 + "score":0.0708485888 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.4376921278 + "score":0.2987254392 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3106300811 + "score":0.0335582401 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.4971105137 + "score":0.2213744022 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4113463435 + "score":0.2797104835 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6260248317 + "score":0.4601471921 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.5310035709 + "score":0.3392404298 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7595845064 + "score":0.5518674496 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3031284355 + "score":0.1682956348 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5241309352 + "score":0.3705606944 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3289699508 + "score":0.1595479626 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.5811203167 + "score":0.4040956812 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.2483616515 + "score":0.0333586544 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5338391625 + "score":0.2582740293 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.1651229998 + "score":0.0187498765 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.3923963113 + "score":0.2265924477 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3057177881 + "score":0.1122756663 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.5523945263 + "score":0.3654994366 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.2711892461 + "score":0.0565617503 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.5802332073 + "score":0.2914640343 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3349110908 + "score":0.1362099506 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.564806297 + "score":0.4039646029 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.4574014191 + "score":0.2151785904 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6626552528 + "score":0.4309035319 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0488154154 + "score":0.0308954874 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.1944904286 + "score":0.1818653 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0115014356 + "score":0.0168607588 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1082073343 + "score":0.1570666495 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.2954497906 + "score":0.0426900866 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.5086877895 + "score":0.2179546047 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1582270271 + "score":0.0232938459 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4201411039 + "score":0.2007105106 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.1697291765 + "score":0.0254280801 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.4073157654 + "score":0.1890793851 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.139672818 + "score":0.0144953008 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.4450194819 + "score":0.1884647114 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3184042229 + "score":0.1658307051 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5712698408 + "score":0.3676480008 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3659029431 + "score":0.1711676323 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5796195236 + "score":0.360371738 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.2791185419 + "score":0.0316097931 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.5226615992 + "score":0.2354084259 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.145820804 + "score":0.0324437189 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.3820953887 + "score":0.2310907497 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2313008892 + "score":0.1200885566 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.4915348458 + "score":0.3203716958 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.2765024802 + "score":0.1112414449 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.5261755337 + "score":0.3175953836 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2301748885 + "score":0.1399603895 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4682741896 + "score":0.3604113675 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3346592082 + "score":0.1312450274 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6514874668 + "score":0.3988827234 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.154261694 + "score":0.0365286922 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.3957095627 + "score":0.1990661561 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.1177946719 + "score":0.0199391634 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.341868335 + "score":0.1497878674 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.2239397579 + "score":0.0448341108 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.4512212104 + "score":0.252793155 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.135748348 + "score":0.0162978516 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.4408716957 + "score":0.2157203276 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.2724260509 + "score":0.0992729275 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5200202435 + "score":0.3331935567 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.2753225284 + "score":0.1507170285 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5457466615 + "score":0.4062498972 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2664966821 + "score":0.1222242366 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5386982677 + "score":0.3415186168 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2651365589 + "score":0.1607198845 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.409095006 + "score":0.2706371796 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.2326358655 + "score":0.0550016522 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4815897231 + "score":0.2468917982 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2187928356 + "score":0.017690474 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.4896578943 + "score":0.2536001746 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.1666068635 + "score":0.0436064509 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.4554883841 + "score":0.2149063632 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.1547742726 + "score":0.0078784523 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.4534139462 + "score":0.1640159488 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.2647824193 + "score":0.0075610365 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.5269086196 + "score":0.1887566902 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1635334444 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3431273828 + "score":0.0847212314 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.247746183 + "score":0.0397152583 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.506339637 + "score":0.2037873099 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.1775009719 + "score":0.0137546968 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.4300321597 + "score":0.1710566423 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2402657185 + "score":0.0564413481 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.497198112 + "score":0.3212656259 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.3100527074 + "score":0.0674768387 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.3696197774 + "score":0.1657544717 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0872330227 + "score":0.037013395 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.3059813913 + "score":0.2338208281 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0010116202 + "score":0.0011308834 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.1893341465 + "score":0.1078312824 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.3447519877 + "score":0.068704384 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.578789784 + "score":0.2935036324 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2508560655 + "score":0.0888104824 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.4987822313 + "score":0.2740386167 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.2732982319 + "score":0.039812774 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.5233285219 + "score":0.2668100649 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.137657899 + "score":0.0365460997 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.3935929024 + "score":0.2177619304 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.2082275626 + "score":0.0221745742 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.4639776287 + "score":0.1989640913 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.1350252624 + "score":0.016161822 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.4834543859 + "score":0.1752502983 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.2600500491 + "score":0.0532822129 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5029669853 + "score":0.2301007371 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.2091322046 + "score":0.022639121 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4711774201 + "score":0.175884413 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2256246926 + "score":0.0411296617 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.4703189943 + "score":0.2442292695 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2254492518 + "score":0.0482775033 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.447826525 + "score":0.2211003771 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.2986303081 + "score":0.1375979502 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5546917725 + "score":0.3518293272 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.3680194341 + "score":0.1462646527 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6778287705 + "score":0.3886405702 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.1979480779 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.4791457508 + "score":0.2238699363 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.1536786708 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.4315811907 + "score":0.1606469353 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.2955515679 + "score":0.0498052059 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.5399574649 + "score":0.2651327526 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.2350766648 + "score":0.0286452462 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4890671168 + "score":0.2023398596 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2583853642 + "score":0.122262497 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.5143387984 + "score":0.3515428019 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.3585971813 + "score":0.1527676878 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.6255063069 + "score":0.4249238432 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.1562574059 + "score":0.0279631361 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.378833839 + "score":0.2321541854 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.1000795039 + "score":0.0195222834 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.447037349 + "score":0.1762015362 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0981161875 + "score":0.0127402107 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.3370208163 + "score":0.1651498064 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.040931235 + "score":0.0088808864 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.3615428475 + "score":0.1702476721 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.2626677598 + "score":0.0366762006 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.4970567085 + "score":0.1697470704 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1807466012 + "score":0.0190635247 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.4219189716 + "score":0.1343627089 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3706063992 + "score":0.2292007848 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6167676482 + "score":0.3716655897 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4234596823 + "score":0.2126702079 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5629443923 + "score":0.3248815955 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2754265608 + "score":0.1085949014 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5207065369 + "score":0.3346783911 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3700040895 + "score":0.1676106101 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5924241261 + "score":0.3907504991 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2971403532 + "score":0.1013445398 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5321068893 + "score":0.3063125264 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4364286549 + "score":0.2231857524 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6509885745 + "score":0.4401829864 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.27702997 + "score":0.0955350175 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5437386483 + "score":0.3286351702 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.4580925611 + "score":0.1871630014 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6514836722 + "score":0.3655570607 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2300270544 + "score":0.1005466956 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4839384065 + "score":0.3356518748 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.4236492288 + "score":0.1363185356 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6116207052 + "score":0.3769111636 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.1602143293 + "score":0.0189997083 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.3793757948 + "score":0.1919557381 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.1370228414 + "score":0.0165320564 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.4797772284 + "score":0.1417103032 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.2571204202 + "score":0.0441421075 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.4946608155 + "score":0.1739314177 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0869374651 + "score":0.0182129294 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.3119061498 + "score":0.119134604 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.1981443603 + "score":0.0302406554 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.4536105905 + "score":0.1787247799 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.2278086127 + "score":0.0014746217 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.4013315084 + "score":0.0931903615 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0939343156 + "score":0.0192199252 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.3129229613 + "score":0.1851933727 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.074740365 + "score":0.016306816 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.4045773842 + "score":0.1413916659 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.2273071628 + "score":0.0299103049 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.4497946959 + "score":0.1986832691 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.1824497409 + "score":0.0138432 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.4972329945 + "score":0.1610036541 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2541965029 + "score":0.1315135307 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.5234491687 + "score":0.3968249514 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.3308712415 + "score":0.1220351802 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5512495988 + "score":0.3505317727 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.1522391036 + "score":0.0605189037 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.3508292995 + "score":0.2644052383 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.1514782919 + "score":0.0323301168 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.4909144205 + "score":0.2153999563 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.2959760233 + "score":0.1638179638 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5398896148 + "score":0.3812064776 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.4357891553 + "score":0.2333909009 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6529723913 + "score":0.4689649165 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.2501435914 + "score":0.0510060878 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5088299265 + "score":0.2132741272 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.262372343 + "score":0.0089146903 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.5806899403 + "score":0.2314109768 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2450100573 + "score":0.0303451125 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.4918691312 + "score":0.1748071119 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.2434733519 + "score":0.0076177075 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5120095348 + "score":0.1986647775 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3568851036 + "score":0.1096331511 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5825326367 + "score":0.2888090685 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3112091725 + "score":0.1206114883 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5431414206 + "score":0.2597106436 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.1741933649 + "score":0.0268228091 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.4272342177 + "score":0.1751009974 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.1811584685 + "score":0.0015866917 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.4001890626 + "score":0.1423262509 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2544201673 + "score":0.0382115226 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.5081271409 + "score":0.2562543067 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.2942923294 + "score":0.0125933293 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.4479604827 + "score":0.1659603426 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0881111208 + "score":0.0161407336 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.3173214379 + "score":0.1568869137 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0384668791 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.1259439982 + "score":0.0241402636 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2563119866 + "score":0.0907273605 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5291012922 + "score":0.2666411269 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3009595898 + "score":0.0495223383 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5854044281 + "score":0.276468397 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2567288533 + "score":0.1408426214 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.5177571061 + "score":0.3768513401 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.3201007033 + "score":0.1029189854 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5323037228 + "score":0.3606378352 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0115980217 + "score":0.023064469 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.115883071 + "score":0.0922502173 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0129780747 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.1450749981 + "score":0.1015661134 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2402951661 + "score":0.0891185343 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5033005385 + "score":0.281793335 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2302239803 + "score":0.0799576366 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4066956434 + "score":0.2661903898 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.203750264 + "score":0.0287030985 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.4979829233 + "score":0.2124751899 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.2162945849 + "score":0.0040250398 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.4941278712 + "score":0.1706379305 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2537752957 + "score":0.1389065496 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5073147534 + "score":0.3708687542 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3583753747 + "score":0.1661316612 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6253917282 + "score":0.354399593 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.1081430594 + "score":0.0242583204 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.2665454299 + "score":0.1929988599 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0142970887 + "score":0.012814538 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.1489810124 + "score":0.1551759179 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.1700904158 + "score":0.0973024735 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.421111634 + "score":0.3076531166 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1213993524 + "score":0.0161682999 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1655788185 + "score":0.0635837055 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.1531171972 + "score":0.0478834907 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.3828830786 + "score":0.208762819 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0483942569 + "score":0.0095341532 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.3116951706 + "score":0.150635966 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0816098185 + "score":0.0080365175 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.2781732759 + "score":0.1647602539 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.041496472 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.202397124 + "score":0.0909640555 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1783312983 + "score":0.1043191943 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4423885999 + "score":0.3414129274 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1632112014 + "score":0.0651436117 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2297357227 + "score":0.1002582276 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.249810194 + "score":0.1133138775 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5151255506 + "score":0.3754190494 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.1854861198 + "score":0.1672636279 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2463326959 + "score":0.232325354 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.1729786376 + "score":0.0206894768 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.4189697233 + "score":0.1952546166 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0866404913 + "score":0.0113901513 }, { - "model":"google\/gemini-flash-1.5-8b", + "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.3943277627 + "score":0.1302687289 }, { - "model":"google\/gemma-3-27b-it", + "model":"gryphe\/mythomax-l2-13b", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.2177971147 + "score":0.2152631134 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.4738076987 + "score":0.4621033585 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.1414064724 + "score":0.1438913245 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.3965739567 + "score":0.3637806215 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.076595229 + "score":0.0488692805 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.2493366365 + "score":0.2406135335 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0440715947 + "score":0.0214880279 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.2820233612 + "score":0.2270659336 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.2 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.1913062339 + "score":0.1203676158 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.4296053228 + "score":0.3241911739 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.13443556 + "score":0.0165994228 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.2528930204 + "score":0.1058904177 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.24265587 + "score":0.2332719546 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.4918380331 + "score":0.4924788322 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.2104382871 + "score":0.1544598614 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.456050442 + "score":0.403814105 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2891206499 + "score":0.2609114367 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5438550217 + "score":0.5167379854 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3184721364 + "score":0.1932802581 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5483731849 + "score":0.4648835751 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1664804364 + "score":0.1270864308 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4585261833 + "score":0.3882289796 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1299183594 + "score":0.1366193757 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3752977557 + "score":0.3691581345 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.1618648119 + "score":0.1966421011 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4104839109 + "score":0.4161867731 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2266738862 + "score":0.1531147508 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4315390742 + "score":0.3635575685 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.2602059805 + "score":0.152657571 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.4987515978 + "score":0.4132859119 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0907943093 + "score":0.0663682991 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.348768221 + "score":0.3007728685 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.3277667824 + "score":0.2739426076 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5267403611 + "score":0.5129388019 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.2123273366 + "score":0.1227671497 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.408906638 + "score":0.3306179967 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.1880331404 + "score":0.130005692 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.4234748209 + "score":0.3815764307 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1517877566 + "score":0.136654027 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.423956163 + "score":0.3809883299 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.148851004 + "score":0.092815209 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.4514291775 + "score":0.4056492611 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.3105472783 + "score":0.2013843536 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.498684126 + "score":0.417738842 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.2430984589 + "score":0.2451140745 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.4969060141 + "score":0.4883780153 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1699224465 + "score":0.1334730215 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.3964402252 + "score":0.363000921 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2907230812 + "score":0.1938367121 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5148223626 + "score":0.4674774016 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3473636391 + "score":0.2073541352 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5442574441 + "score":0.4438396219 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.3650597419 + "score":0.260902514 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.5512750223 + "score":0.4589913242 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.293824845 + "score":0.2033313823 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.5724817779 + "score":0.4899690932 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.1983726871 + "score":0.1866723671 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.4779908235 + "score":0.3936568086 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0792877335 + "score":0.0494661624 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.3908004248 + "score":0.320713519 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.2863884915 + "score":0.2435247423 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.5641108436 + "score":0.501836375 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.352498756 + "score":0.1956638929 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5860513143 + "score":0.4870538255 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3577876868 + "score":0.2685056004 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5943423055 + "score":0.506818165 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.5344280565 + "score":0.3801657831 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.7084649844 + "score":0.6056477234 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"el", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2914236052 + "score":0.2197756902 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.5147962724 + "score":0.4571715629 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3761179017 + "score":0.2911113336 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5590147212 + "score":0.4641387139 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"arc", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.452427177 + "score":0.2216031518 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6525566656 + "score":0.4429472312 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.5467976399 + "score":0.3322887566 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7780833183 + "score":0.5824514758 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3554331718 + "score":0.1640783778 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5997743406 + "score":0.434749516 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3847830842 + "score":0.3126220052 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6191109047 + "score":0.6038883227 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"fa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.2579194729 + "score":0.2147534918 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5351839762 + "score":0.4874495537 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.1571676635 + "score":0.094833194 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.4046770996 + "score":0.3292486732 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3771043132 + "score":0.3074477197 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.5835797455 + "score":0.5168064726 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.3564426025 + "score":0.2380541489 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.6107274367 + "score":0.552589393 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2846092378 + "score":0.2261588318 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5655970541 + "score":0.4944996319 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5192984544 + "score":0.4626001556 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7020040834 + "score":0.6614963779 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0220051815 + "score":0.0704357087 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.1861453784 + "score":0.214215478 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0227307294 + "score":0.0113955269 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1850492522 + "score":0.1874434226 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.2796934014 + "score":0.2250377214 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.5155626456 + "score":0.4688956519 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1979202011 + "score":0.1515937263 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4528880823 + "score":0.4251249067 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.2 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.1595296755 + "score":0.0957125553 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.3859356797 + "score":0.3135046613 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.171830216 + "score":0.1387309388 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.4608354018 + "score":0.383923158 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3520691191 + "score":0.3316295853 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6035990708 + "score":0.5594083443 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3987037224 + "score":0.3598049012 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6195037668 + "score":0.5828568956 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.2709410734 + "score":0.1166136282 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.4976144005 + "score":0.3855078109 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1150407607 + "score":0.0887957809 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.3709160058 + "score":0.3187695245 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2903150375 + "score":0.2237665442 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.5392715859 + "score":0.4896395702 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.3460432788 + "score":0.2614626337 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.6009670508 + "score":0.5280652466 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3377417704 + "score":0.2236143729 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5674360496 + "score":0.4912948296 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3534620252 + "score":0.2910526755 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6680177029 + "score":0.6005590773 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.1600009223 + "score":0.0845382562 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.3857586031 + "score":0.3303135434 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.1602266912 + "score":0.1267144204 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.4091024664 + "score":0.3667670284 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.1546473042 + "score":0.1301757317 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.3985794204 + "score":0.3568142061 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.1752645287 + "score":0.1161068297 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.4668449261 + "score":0.4147684511 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.3356485456 + "score":0.2150043089 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5684527887 + "score":0.4693148389 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3627134123 + "score":0.2998342329 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.6050822949 + "score":0.5545377546 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2481856237 + "score":0.2131020144 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5180749152 + "score":0.4845704057 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.242508046 + "score":0.1446650781 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4046420215 + "score":0.2292145443 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3019627022 + "score":0.1840709267 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5133980923 + "score":0.4042090141 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2316517545 + "score":0.1902389614 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5189963647 + "score":0.4796942089 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"kk", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.2187004813 + "score":0.1500855 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.4910590831 + "score":0.4100527329 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.2108939118 + "score":0.1136442629 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.4375825873 + "score":0.4236240472 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.3274744668 + "score":0.1899800627 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.5605813039 + "score":0.4618900518 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.0800539722 + "score":0.087831891 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3336188156 + "score":0.265907742 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"kn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.2692189197 + "score":0.1797031918 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.5290912174 + "score":0.446466319 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.2869741566 + "score":0.1846339038 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.5072256514 + "score":0.4220658756 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2172591082 + "score":0.1932392069 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.478962626 + "score":0.4204852284 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.2169046229 + "score":0.0476115004 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.3151387909 + "score":0.0790735292 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0905061152 + "score":0.0505101039 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.321707617 + "score":0.2502174391 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0361920973 + "score":0.0135029462 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.222315171 + "score":0.1682326163 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.3100950481 + "score":0.2778994313 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.558054933 + "score":0.534066621 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2213152575 + "score":0.1661930328 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.4821662369 + "score":0.4001562798 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.3126340837 + "score":0.212877318 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.5601639768 + "score":0.4587224182 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.1875297747 + "score":0.0873487304 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4394137195 + "score":0.3409755146 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.2502298144 + "score":0.1013985932 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.5206889602 + "score":0.3022487832 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.1301910408 + "score":0.0611289601 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.4488625613 + "score":0.3842495071 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.1 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.2809005667 + "score":0.2670990652 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5466717628 + "score":0.5220692033 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.2287455417 + "score":0.1981851908 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4915489263 + "score":0.4555314776 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3395095603 + "score":0.2152417217 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5877742809 + "score":0.4578207034 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2222923122 + "score":0.1654073391 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4572688692 + "score":0.3941079443 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", - "task":"mgsm", + "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.3794800258 + "score":0.2763114217 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.6256125923 + "score":0.5353874356 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.3593747877 + "score":0.3315044625 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.664135376 + "score":0.6241092077 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.251920694 + "score":0.2083780287 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.4662583176 + "score":0.4466015977 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.1647980206 + "score":0.0879797246 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.4166796691 + "score":0.3441521948 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.3022338928 + "score":0.2353570133 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.5587522289 + "score":0.4846450712 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.2252421952 + "score":0.1632876087 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4768786292 + "score":0.4158370821 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2793746981 + "score":0.2136151785 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.5246312011 + "score":0.4670269701 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.3496466203 + "score":0.2202915792 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.6032151622 + "score":0.5233013945 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.119086784 + "score":0.0757269477 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.3898511388 + "score":0.2848998148 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0923649849 + "score":0.0329429353 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.4837931302 + "score":0.193397393 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0417850648 + "score":0.0206783974 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.2509675066 + "score":0.2005424268 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0312813941 + "score":0.0171071488 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.2886309955 + "score":0.239253642 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"or", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.2613495089 + "score":0.1677012885 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.5009335042 + "score":0.4316686173 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1648455996 + "score":0.1202133569 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.3943041737 + "score":0.3816566526 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4164890636 + "score":0.1891048622 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6375470445 + "score":0.4238091524 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4166823661 + "score":0.2086330089 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.575314128 + "score":0.364326938 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2913506513 + "score":0.1998083747 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5431985912 + "score":0.4667760664 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3723742743 + "score":0.3336132898 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5891983505 + "score":0.5541703282 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.1 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.340245547 + "score":0.2256776552 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5681284927 + "score":0.4793769886 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4702737577 + "score":0.3320822339 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6903236014 + "score":0.6132478102 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.272965046 + "score":0.1978207058 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5527916308 + "score":0.4682392821 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.5195197328 + "score":0.409911871 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6892729705 + "score":0.6078072484 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2862936285 + "score":0.1730354472 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5264436928 + "score":0.454951133 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.4800957551 + "score":0.3073459183 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6618495803 + "score":0.554890569 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.1701995093 + "score":0.0841096684 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.4262662427 + "score":0.2671346741 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.1513262342 + "score":0.0409000825 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.4732082637 + "score":0.3185422263 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.2365858071 + "score":0.1343511225 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.4722212406 + "score":0.385804118 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.2251623508 + "score":0.0662090824 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.4159341653 + "score":0.3030628402 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.226689844 + "score":0.1430431721 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.4706510499 + "score":0.3858131555 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.2258552473 + "score":0.1602151366 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.4191499082 + "score":0.357355981 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0756830418 + "score":0.0399118136 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.3184767575 + "score":0.2285191544 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.0495523985 + "score":0.0269601697 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.3971096934 + "score":0.3006594703 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.1915993132 + "score":0.0705893599 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.4208812642 + "score":0.26834967 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.1724511246 + "score":0.0988956652 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.4741419887 + "score":0.3896116232 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.245439349 + "score":0.2014976842 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.536270172 + "score":0.470196154 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.3929818488 + "score":0.2922893702 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5787667028 + "score":0.504298223 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.1784974236 + "score":0.1538893384 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4520828188 + "score":0.4005055487 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.1435021957 + "score":0.123816759 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.4868234587 + "score":0.4480197519 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.3258404036 + "score":0.2739803344 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5652149653 + "score":0.5286596224 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.4264864443 + "score":0.3468145126 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6543542662 + "score":0.6078627072 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.268709657 + "score":0.1798653655 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5306834056 + "score":0.4290347271 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.3395981599 + "score":0.1709237193 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6130756934 + "score":0.522080463 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2563448403 + "score":0.1405088565 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.5007966916 + "score":0.4088204523 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.2405135195 + "score":0.2360333676 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5564984925 + "score":0.5038093602 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.2787922254 + "score":0.2507091988 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5420797212 + "score":0.5067409185 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.259866454 + "score":0.2998213366 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.48543634 + "score":0.5112516267 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.2018690154 + "score":0.1029119511 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.4335923466 + "score":0.37770665 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.0972794658 + "score":0.1507872741 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.327182503 + "score":0.3945276116 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"th", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2400131449 + "score":0.2022308722 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.4851690277 + "score":0.4620737135 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.383380628 + "score":0.2592698778 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.5430720239 + "score":0.4253112122 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0736674948 + "score":0.052152068 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.2974206944 + "score":0.2578037483 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0241026131 + "score":0.0237309602 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.1246172628 + "score":0.0808540368 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2757340333 + "score":0.2231619401 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5244536559 + "score":0.4653230255 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3185578758 + "score":0.2701457179 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5765088485 + "score":0.5043061571 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2918547905 + "score":0.2437336637 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.5631912653 + "score":0.5042825637 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.3283437369 + "score":0.3231504544 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5565790802 + "score":0.5440009645 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0359452883 + "score":0.0461126383 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.1779043042 + "score":0.1577698173 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0305779168 + "score":0.0008007826 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.1838354035 + "score":0.1426831674 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2595582459 + "score":0.194871957 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5043992681 + "score":0.4838690709 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2142625601 + "score":0.1908512613 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.429749938 + "score":0.3855814375 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.2595944841 + "score":0.177513842 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.5081810113 + "score":0.386230097 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.2601189518 + "score":0.2544813414 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.5225655991 + "score":0.4948262917 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", - "task":"mgsm", + "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2279880384 + "score":0.2003733128 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.4835933272 + "score":0.4613442635 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3478085621 + "score":0.3326058501 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5968604742 + "score":0.5496097026 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.065945115 + "score":0.0685259305 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.2358663461 + "score":0.234718691 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0540055322 + "score":0.0319335459 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.2390749172 + "score":0.1970529604 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.246042863 + "score":0.1538409962 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4917114856 + "score":0.4398974306 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1017188886 + "score":0.0373503953 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1707828137 + "score":0.074080246 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.1 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.167004472 + "score":0.0707946688 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.400944552 + "score":0.2881582575 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.066271851 + "score":0.0204932467 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.3937495329 + "score":0.2690069759 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0472060067 + "score":0.0339322053 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.2924612708 + "score":0.2166922882 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0525309984 + "score":0.0169319822 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.2304098638 + "score":0.1617793532 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.2124709579 + "score":0.174955946 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4811646042 + "score":0.4554476081 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1392232 + "score":0.1400846872 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2205120991 + "score":0.1809201358 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1900086584 + "score":0.1869648376 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4895930442 + "score":0.4664675781 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2395565562 + "score":0.1696786467 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3237759485 + "score":0.2325846099 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.1607803472 + "score":0.0515154857 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.4377738064 + "score":0.2819357103 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.1538390263 + "score":0.0804926199 }, { - "model":"google\/gemma-3-27b-it", + "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.4751516021 + "score":0.3473550746 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3-70b-instruct", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"aeb", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.3397504765 + "score":0.2561356588 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.560449359 + "score":0.49648558 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.278826715 + "score":0.2071032166 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.4815690002 + "score":0.4109686249 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"af", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"af", + "task":"translation_from", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"af", + "task":"translation_from", + "metric":"chrf", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"af", + "task":"translation_to", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"af", + "task":"translation_to", + "metric":"chrf", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ak", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ak", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.2795001892 + "score":0.1091965593 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.5119662189 + "score":0.3173514683 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.1719218154 + "score":0.0490503072 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.4619906072 + "score":0.2649288206 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.2955792162 + "score":0.1375609672 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.5460142346 + "score":0.3710873948 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.2550661243 + "score":0.0526682247 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.3516234079 + "score":0.1185738392 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"apc", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.3399225795 + "score":0.2624553878 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.5942330704 + "score":0.5118695802 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.2816905761 + "score":0.2175864677 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.5676475667 + "score":0.4641969296 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ar", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.3493985929 + "score":0.2766513185 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5975748844 + "score":0.5528193738 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3475330474 + "score":0.3371708551 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5776317086 + "score":0.5607841978 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ary", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.2092670256 + "score":0.1412140088 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.466339127 + "score":0.4082636767 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1728000073 + "score":0.1613157357 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.4285875773 + "score":0.3896124669 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"arz", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.2747967998 + "score":0.2374604323 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.5090740494 + "score":0.4585263555 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2401570931 + "score":0.2300327193 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4670149488 + "score":0.43482663 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"as", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.2674941424 + "score":0.220118305 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.541891802 + "score":0.4767360664 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.1979823055 + "score":0.0623766799 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.4521218857 + "score":0.2986098722 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"awa", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.3768023433 + "score":0.3261928856 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.6041064745 + "score":0.5339015736 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.2943485815 + "score":0.2330787096 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.5100804178 + "score":0.4427484336 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.2273493056 + "score":0.1767618659 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.4822061401 + "score":0.4088166263 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1907459838 + "score":0.1381246624 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.4304499853 + "score":0.3730280956 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"be", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.2698506992 + "score":0.161760748 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.5332909304 + "score":0.450732576 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.3805163094 + "score":0.2170520787 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.5444910857 + "score":0.4098923096 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.3207673833 + "score":0.2731033294 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"translation_from", + "metric":"chrf", + "score":0.5159041397 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bho", + "task":"translation_to", + "metric":"bleu", + "score":0.2075901182 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", + "task":"translation_to", + "metric":"chrf", + "score":0.4086862509 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bm", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bm", + "task":"translation_from", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bm", + "task":"translation_from", + "metric":"chrf", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bm", + "task":"translation_to", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bm", + "task":"translation_to", + "metric":"chrf", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bn", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bn", + "task":"translation_from", + "metric":"bleu", + "score":0.2623045124 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bn", + "task":"translation_from", + "metric":"chrf", + "score":0.512895511 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bn", + "task":"translation_to", + "metric":"bleu", + "score":0.3328087961 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"bn", + "task":"translation_to", + "metric":"chrf", + "score":0.5013967236 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ca", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ca", + "task":"translation_from", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ca", "task":"translation_from", "metric":"chrf", - "score":0.5578909014 + "score":0.0 }, { - "model":"google\/translate-v2", - "bcp_47":"bho", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ca", "task":"translation_to", "metric":"bleu", - "score":0.1931718671 + "score":0.0 }, { - "model":"google\/translate-v2", - "bcp_47":"bho", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ca", "task":"translation_to", "metric":"chrf", - "score":0.4102436779 + "score":0.0 }, { - "model":"google\/translate-v2", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2940937001 + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"google\/translate-v2", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5656960013 + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ceb", + "task":"classification", + "metric":"accuracy", + "score":0.8 }, { - "model":"google\/translate-v2", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.4105743367 + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 }, { - "model":"google\/translate-v2", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.582274226 + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.455106564 + "score":0.36364203 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.6691241367 + "score":0.5712362729 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.5332526559 + "score":0.2701190878 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.7086055004 + "score":0.551918321 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ckb", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.2827542245 + "score":0.1903904403 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.5361942504 + "score":0.4687496067 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.3935667187 + "score":0.0520773173 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.6110160857 + "score":0.3368230674 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"cs", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"cs", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.3504448262 + "score":0.3416129059 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.6171883377 + "score":0.5845038999 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.4493928736 + "score":0.2726323508 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.6238587383 + "score":0.5283879512 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"de", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"de", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.4085073951 + "score":0.3059254014 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.6323019852 + "score":0.5403551155 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.5247754427 + "score":0.4588037752 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.7142099767 + "score":0.6692431614 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"el", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"el", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.3267673394 + "score":0.268843518 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.5607576056 + "score":0.493449014 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3745254965 + "score":0.3403168702 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5676283692 + "score":0.5086292148 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"en", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"en", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"en", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.6256942034 + "score":0.5486108614 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.7540191814 + "score":0.6873139374 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.70888051 + "score":0.691450825 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8684926816 + "score":0.8407064328 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"es", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"es", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"es", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.350374858 + "score":0.3021494986 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5997407835 + "score":0.5604145602 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3746387789 + "score":0.3865320679 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6101421618 + "score":0.6126903448 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.3295944742 + "score":0.3383579693 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5865092795 + "score":0.5641679075 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.2930219204 + "score":0.2069613925 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.5297678901 + "score":0.4288746449 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fil", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fil", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.4111750064 + "score":0.3771163962 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.6314637291 + "score":0.5827777548 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.4316058282 + "score":0.3225619014 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.6598411557 + "score":0.5830090459 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3779603397 + "score":0.2924853239 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.6286963509 + "score":0.5462367408 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5835846952 + "score":0.4963618411 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7468900473 + "score":0.682573515 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"fuv", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.1585736619 + "score":0.0727409119 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.3595908619 + "score":0.1973579541 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0281783964 + "score":0.0147511412 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.2135990911 + "score":0.1172295571 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"gu", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.3338203117 + "score":0.3220032872 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.550900416 + "score":0.537784261 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.2235904654 + "score":0.2206953431 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4889537149 + "score":0.4990640113 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ha", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ha", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ha", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ha", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.3014462049 + "score":0.1527043255 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.5332346012 + "score":0.4074071592 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.2941569015 + "score":0.1567942198 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.5452786239 + "score":0.4125213011 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hi", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.432237812 + "score":0.3914590212 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6378291521 + "score":0.6128431348 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.4001439439 + "score":0.4189427376 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6257483281 + "score":0.6262550579 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hne", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.2338808528 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.4544102907 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.1243598882 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", + "score":0.4051657211 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ht", + "task":"classification", + "metric":"accuracy", "score":0.0 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ht", + "task":"translation_from", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ht", + "task":"translation_from", + "metric":"chrf", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ht", + "task":"translation_to", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ht", + "task":"translation_to", + "metric":"chrf", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hu", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.3369838412 + "score":0.2678903597 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.564308487 + "score":0.520149627 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.4344044669 + "score":0.3361911519 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.6654473209 + "score":0.5583527487 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"id", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3943233817 + "score":0.290274553 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.6168947522 + "score":0.5143960108 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.5116660025 + "score":0.3789023659 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.730698655 + "score":0.6751523776 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ig", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ig", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ig", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.3708158915 + "score":0.1640789976 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.5993132477 + "score":0.4462743519 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.2705214178 + "score":0.1964520184 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.504548883 + "score":0.3861299089 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ilo", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.3900081426 + "score":0.1899251487 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.6052122639 + "score":0.428775702 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.3336718595 + "score":0.1861284915 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.5750387432 + "score":0.4568269097 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"it", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"it", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"it", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.3278554945 + "score":0.296179579 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5743022789 + "score":0.5259369403 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3394020951 + "score":0.3187240753 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5938537899 + "score":0.5786749514 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ja", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ja", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.3409989486 + "score":0.3306727326 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5959288844 + "score":0.5648258387 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.3711980077 + "score":0.262512317 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4848412412 + "score":0.4036795798 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3673114251 + "score":0.2593989014 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.6183652016 + "score":0.4825732152 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.3709969529 + "score":0.2177203514 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.598464243 + "score":0.452576603 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ki", + "task":"classification", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ki", + "task":"translation_from", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ki", + "task":"translation_from", + "metric":"chrf", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ki", + "task":"translation_to", + "metric":"bleu", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ki", + "task":"translation_to", + "metric":"chrf", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kk", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.3570145905 + "score":0.2013677498 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.5887718416 + "score":0.4740327886 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.3713033391 + "score":0.2015887265 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.604716209 + "score":0.4935901226 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"km", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.3851664104 + "score":0.3098535214 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.6312237305 + "score":0.5740290935 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1874455996 + "score":0.0913702814 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.4426393743 + "score":0.3128837987 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kn", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.3113098415 + "score":0.2155836452 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.5501022834 + "score":0.4583181839 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.3347737931 + "score":0.2468017951 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.5576944014 + "score":0.4734415865 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ko", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ko", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2822808126 + "score":0.2276258723 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.5526101149 + "score":0.4577745447 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.2612977966 + "score":0.3046437152 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.3457225363 + "score":0.3825631739 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"lua", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.1034813211 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.2900727397 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0357350273 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.2486408005 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mag", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.3838720489 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.5776118345 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.200683389 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", + "score":0.4892134584 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mai", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", "score":0.0 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.3348942842 + "score":0.2846718719 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.5861344551 + "score":0.5239506053 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.1311732143 + "score":0.135802286 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4350789061 + "score":0.3994207414 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mg", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mg", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.2903894802 + "score":0.1470288737 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.5623472971 + "score":0.370597998 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.2190660395 + "score":0.0624257747 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.5006362228 + "score":0.3667603362 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ml", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.339831623 + "score":0.271926141 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.590846484 + "score":0.5318057054 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.3016318322 + "score":0.2579782083 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.5461894184 + "score":0.4806502272 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3491068707 + "score":0.2987546297 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5803894973 + "score":0.526361525 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.3274616019 + "score":0.1515083487 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.5109521029 + "score":0.4332812085 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.3962757824 + "score":0.3458633411 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.6224286451 + "score":0.5703964991 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.5032472209 + "score":0.4050410338 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.7257127115 + "score":0.6535211779 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.3361081405 + "score":0.2598947984 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.5602875655 + "score":0.4881220333 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.282320421 + "score":0.2015903544 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.4830195157 + "score":0.4513077936 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ne", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ne", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.339447252 + "score":0.260901002 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.5865985454 + "score":0.4896606547 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.3120075365 + "score":0.1429173328 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.5354876043 + "score":0.4071639857 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"nl", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"nl", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"nl", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.3500838996 + "score":0.2472539314 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.5820135911 + "score":0.4972878378 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.3833463355 + "score":0.2865823477 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.630764328 + "score":0.5850706516 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ny", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ny", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.2752866209 + "score":0.0888165228 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.5470670325 + "score":0.2927704081 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.2362408388 + "score":0.0527161443 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.5649412405 + "score":0.3183888298 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.2628008901 + "score":0.024472012 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.5081811686 + "score":0.2143061298 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0993493347 + "score":0.0100859589 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.456860433 + "score":0.2528047704 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"or", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.3258874325 + "score":0.285587224 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.5886625327 + "score":0.5216659729 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.321631251 + "score":0.1281773816 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.5362369434 + "score":0.3720376754 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4482674529 + "score":0.4479091606 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.671945393 + "score":0.6272058507 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.5160129517 + "score":0.4155919737 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.6445374779 + "score":0.5685427433 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pl", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pl", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pl", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.3157581247 + "score":0.2568309796 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5682347228 + "score":0.5058927884 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3771434243 + "score":0.3323405641 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.6045220423 + "score":0.5569354008 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pt", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3489983932 + "score":0.3183331223 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5800455435 + "score":0.5559144449 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.5650298473 + "score":0.4500659682 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.7365285421 + "score":0.6761551234 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ro", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ro", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ro", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.3390274579 + "score":0.2803966495 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.592940935 + "score":0.5308215606 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.5470657372 + "score":0.5346576918 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.7020072444 + "score":0.7024180686 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ru", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ru", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.3013142128 + "score":0.2051262499 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5656623498 + "score":0.4837372958 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.5178438056 + "score":0.3699194641 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6867971436 + "score":0.5770913921 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"rw", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"rw", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.3354195212 + "score":0.1806014296 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.5741211618 + "score":0.4291519278 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.3462677897 + "score":0.0966721561 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.5833767681 + "score":0.3133383199 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sd", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.3412028977 + "score":0.2666171334 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.5889369863 + "score":0.4765001737 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.3852572206 + "score":0.1168753501 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.5784169857 + "score":0.3522716786 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"si", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"si", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.2698751119 + "score":0.2270309753 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.5340401081 + "score":0.4841536531 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.4096366215 + "score":0.1352090178 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.5525266748 + "score":0.3326584955 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sn", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.256568307 + "score":0.067782587 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.4900607089 + "score":0.2594122638 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.1305127177 + "score":0.0496762437 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.4214140091 + "score":0.2555516699 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"so", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"so", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.3156233999 + "score":0.1103212906 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.5490670273 + "score":0.3317936338 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.1872166048 + "score":0.1178727843 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.4920219369 + "score":0.3706477532 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.321113344 + "score":0.2704960778 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.5864222708 + "score":0.5452851397 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.4884555721 + "score":0.395555296 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.6556325596 + "score":0.5799018584 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"su", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.352233215 + "score":0.253350788 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.5470765309 + "score":0.458172945 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.1904552367 + "score":0.1663559354 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.4562964405 + "score":0.4946292339 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sv", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.3843214006 + "score":0.3079953173 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.6136910044 + "score":0.5447429639 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.4689482853 + "score":0.3935950974 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6877930778 + "score":0.6342780862 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sw", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sw", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.4161610215 + "score":0.2983575506 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.6455274177 + "score":0.5283269577 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.5117468349 + "score":0.3087303367 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.7232937985 + "score":0.6028401745 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ta", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.3318364746 + "score":0.2210855899 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.5738929543 + "score":0.4616487061 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.3895968702 + "score":0.2867265678 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.6277778554 + "score":0.5646798034 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"te", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.4077995927 + "score":0.357367421 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.6409257804 + "score":0.5659157865 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.485273374 + "score":0.3515649071 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.6710025354 + "score":0.5645706042 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tg", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tg", + "task":"translation_from", + "metric":"bleu", + "score":0.1896909211 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tg", + "task":"translation_from", + "metric":"chrf", + "score":0.4144350541 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tg", + "task":"translation_to", + "metric":"bleu", + "score":0.202234159 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tg", + "task":"translation_to", + "metric":"chrf", + "score":0.4284203038 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"th", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"th", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.3269754516 + "score":0.2422208114 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.5639027355 + "score":0.4745802383 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.465639801 + "score":0.3118363896 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.597395155 + "score":0.4892437324 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ti", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.2639320429 + "score":0.0689409767 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.5034191891 + "score":0.2722531305 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.2139020366 + "score":0.0406781073 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.3242506245 + "score":0.0970886698 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tr", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"tr", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.3693666266 + "score":0.2772171859 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.6134245868 + "score":0.5118206984 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3936947375 + "score":0.3271066365 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.6497039072 + "score":0.5803660329 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uk", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2907704167 + "score":0.2451818788 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.5698553329 + "score":0.4929183421 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.4401277302 + "score":0.2948409512 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.6278421339 + "score":0.5143250682 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"umb", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"umb", + "task":"translation_from", + "metric":"bleu", + "score":0.0677223644 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"umb", + "task":"translation_from", + "metric":"chrf", + "score":0.1661136189 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"umb", + "task":"translation_to", + "metric":"bleu", + "score":0.0419433658 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"umb", + "task":"translation_to", + "metric":"chrf", + "score":0.1567633534 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.3080488172 + "score":0.2584265792 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5695112482 + "score":0.5020697076 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.3188563568 + "score":0.2624388601 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.5116789278 + "score":0.4452300688 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.273125871 + "score":0.2640324297 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.5508470442 + "score":0.5196545965 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.3030324343 + "score":0.2748329219 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.5648891805 + "score":0.5341998684 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.3603047797 + "score":0.2872551102 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5927521365 + "score":0.5360250569 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.4395780689 + "score":0.3575088107 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6552870615 + "score":0.5879974234 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"wo", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"wo", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.0892196115 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.0 + "score":0.266303191 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0392376693 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.0 + "score":0.1442620012 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"wuu", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.3662176152 + "score":0.2174626032 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.5856640284 + "score":0.4686565248 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1500486487 + "score":0.0941401506 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.2162606152 + "score":0.1690248565 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"xh", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"xh", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.3163153725 + "score":0.0921572696 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.5712728237 + "score":0.3072945662 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.1386875315 + "score":0.052810575 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.469849511 + "score":0.2387421258 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yo", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yo", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yo", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yo", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.1925403782 + "score":0.0706373037 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.4228528325 + "score":0.2741240176 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0437670613 + "score":0.0463945559 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.2053727616 + "score":0.1664955069 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yue", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.2311956685 + "score":0.2098595469 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.5099861434 + "score":0.4603190119 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.281120015 + "score":0.0831701767 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.3263629293 + "score":0.2065328404 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zh", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.3441147842 + "score":0.2619931658 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.6121137924 + "score":0.5251502482 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.4192686299 + "score":0.2725475868 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.4558586669 + "score":0.3148065512 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zu", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zu", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.353693059 + "score":0.1611133093 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.607730412 + "score":0.379825756 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.3303018306 + "score":0.0702261565 }, { - "model":"google\/translate-v2", + "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.5960312224 + "score":0.3706020457 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.1-70b-instruct", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.0750313913 + "score":0.2279903683 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.2689370364 + "score":0.4722573355 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.1097950919 + "score":0.1435174722 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.2459305972 + "score":0.3991235315 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.04291871 + "score":0.1058038471 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.1992226055 + "score":0.3048469769 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0012692029 + "score":0.0491812173 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.1055962738 + "score":0.2808449794 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0135029462 + "score":0.1452228976 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.1510010912 + "score":0.3699202818 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0033288372 + "score":0.0630164833 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.0389705109 + "score":0.1755172285 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.0592251547 + "score":0.2933386948 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.2921276604 + "score":0.5447211689 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.0366276845 + "score":0.2166524228 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.2393327958 + "score":0.4496497227 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.0837265107 + "score":0.2997360932 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.3012065838 + "score":0.552448295 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.1120670716 + "score":0.2837522278 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.2771296913 + "score":0.5150814494 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.0213908698 + "score":0.1559168311 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.2206299292 + "score":0.4268663808 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.0285726559 + "score":0.1391913129 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.2326683564 + "score":0.3773300026 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.0290259599 + "score":0.1936957127 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.2238098591 + "score":0.4274660929 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.0597935462 + "score":0.2338775014 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.2388670431 + "score":0.4546098648 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.0254218054 + "score":0.2240336457 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.1827114877 + "score":0.456891102 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0012328171 + "score":0.0249457171 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.1200208328 + "score":0.237299794 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.0888105743 + "score":0.3606359676 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.2644372522 + "score":0.5477908661 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.0668939667 + "score":0.1847580734 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.2520473985 + "score":0.3911269476 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.0148972561 + "score":0.1576463626 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.2336350172 + "score":0.3910005157 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.0047574121 + "score":0.1690495289 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.1922357185 + "score":0.3930640761 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.0366814427 + "score":0.1487255467 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.2731193887 + "score":0.4407404732 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.0173677773 + "score":0.2175383868 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.2136838993 + "score":0.4344749015 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.0581882104 + "score":0.3061950313 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.2702416532 + "score":0.5535821276 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.0562052656 + "score":0.1811004213 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.2181774858 + "score":0.3534531968 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.021858254 + "score":0.2646772038 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.2327499821 + "score":0.5157826791 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.0253088472 + "score":0.331255344 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.1918662187 + "score":0.5093924414 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.1238388635 + "score":0.3806413844 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.3442960257 + "score":0.5722776653 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.0243559813 + "score":0.2863154138 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.2777667131 + "score":0.5492472281 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.0155834504 + "score":0.181535472 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.1833348617 + "score":0.4498383877 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0712120544 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.1279598659 + "score":0.3404533027 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.1272159331 + "score":0.2928040954 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.3713000806 + "score":0.5416342014 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.1249879163 + "score":0.3623393932 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.332725923 + "score":0.5863625454 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.1784955678 + "score":0.3129672706 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.4008799371 + "score":0.5456543979 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.1782663616 + "score":0.4852409005 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.4070825897 + "score":0.6908376394 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.0708485888 + "score":0.2581336709 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.2987254392 + "score":0.4704308834 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.0335582401 + "score":0.3342775397 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.2213744022 + "score":0.5026657233 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.2797104835 + "score":0.5510215557 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.4601471921 + "score":0.6909834226 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.3392404298 + "score":0.6679215449 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.5518674496 + "score":0.823262947 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.1682956348 + "score":0.3139331841 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.3705606944 + "score":0.5454623234 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.1595479626 + "score":0.3561290923 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.4040956812 + "score":0.599796306 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.0333586544 + "score":0.3069040556 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.2582740293 + "score":0.5327832177 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.0187498765 + "score":0.2185015953 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.2265924477 + "score":0.4264089038 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.1122756663 + "score":0.3745780882 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.3654994366 + "score":0.5785175063 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.0565617503 + "score":0.2904415478 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.2914640343 + "score":0.5908280404 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.1362099506 + "score":0.2963449909 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4039646029 + "score":0.5544997379 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.2151785904 + "score":0.5091700689 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.4309035319 + "score":0.690320784 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0308954874 + "score":0.0862885919 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.1818653 + "score":0.2372420697 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0168607588 + "score":0.0261732885 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1570666495 + "score":0.168027641 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.0426900866 + "score":0.295613677 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.2179546047 + "score":0.5221513183 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.0232938459 + "score":0.1697878702 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.2007105106 + "score":0.4619303787 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.0254280801 + "score":0.1704488365 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.1890793851 + "score":0.4403441536 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.0144953008 + "score":0.1466885285 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.1884647114 + "score":0.4179596519 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.1658307051 + "score":0.385731086 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.3676480008 + "score":0.6034244629 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.1711676323 + "score":0.357812342 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.360371738 + "score":0.5894721809 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.0316097931 + "score":0.2543010782 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.2354084259 + "score":0.4974062413 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.0324437189 + "score":0.1038893205 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.2310907497 + "score":0.3250555425 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.1200885566 + "score":0.276319154 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.3203716958 + "score":0.5531277158 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.1112414449 + "score":0.3130631115 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.3175953836 + "score":0.576837791 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.1399603895 + "score":0.3143630083 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.3604113675 + "score":0.5621460006 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.1312450274 + "score":0.3372718385 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.3988827234 + "score":0.6543894215 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0365286922 + "score":0.1641186812 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.1990661561 + "score":0.4109013799 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.0199391634 + "score":0.1375093856 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.1497878674 + "score":0.3810358014 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.0448341108 + "score":0.2236376263 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.252793155 + "score":0.4348813399 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.0162978516 + "score":0.1135128656 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.2157203276 + "score":0.4345857133 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.0992729275 + "score":0.2934122255 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.3331935567 + "score":0.5393871714 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.1507170285 + "score":0.3100077394 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.4062498972 + "score":0.5714730187 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1222242366 + "score":0.284322765 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.3415186168 + "score":0.5304019177 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.1607198845 + "score":0.2212745751 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.2706371796 + "score":0.355950114 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.0550016522 + "score":0.2779520489 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.2468917982 + "score":0.5022141687 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.017690474 + "score":0.2476437073 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.2536001746 + "score":0.4805413308 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.0436064509 + "score":0.1839138217 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.2149063632 + "score":0.4860524069 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.0078784523 + "score":0.170404008 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.1640159488 + "score":0.4549282359 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.0075610365 + "score":0.2759135194 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.1887566902 + "score":0.5427685716 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.1237988917 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.0847212314 + "score":0.3265823778 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.0397152583 + "score":0.2379701997 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.2037873099 + "score":0.4807978998 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.0137546968 + "score":0.2068140088 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.1710566423 + "score":0.4615143451 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.0564413481 + "score":0.2508944927 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.3212656259 + "score":0.4998320266 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.0674768387 + "score":0.210142355 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.1657544717 + "score":0.3127505848 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.037013395 + "score":0.0890010757 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2338208281 + "score":0.2623679578 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0011308834 + "score":0.0406768013 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.1078312824 + "score":0.2265562343 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.068704384 + "score":0.3934799806 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.2935036324 + "score":0.6000359011 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.0888104824 + "score":0.2497480714 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.2740386167 + "score":0.5149579975 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.039812774 + "score":0.2776870629 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.2668100649 + "score":0.5361437897 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.0365460997 + "score":0.101231398 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.2177619304 + "score":0.3867790942 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.0221745742 + "score":0.1710070826 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.1989640913 + "score":0.3961869922 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.016161822 + "score":0.0586978059 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.1752502983 + "score":0.3322172345 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.0532822129 + "score":0.2566290969 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.2301007371 + "score":0.5196341734 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.022639121 + "score":0.2701878605 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.175884413 + "score":0.4964908212 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.0411296617 + "score":0.3224419544 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.2442292695 + "score":0.5606155155 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.0482775033 + "score":0.1663847917 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.2211003771 + "score":0.4383662593 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.1375979502 + "score":0.3103894957 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.3518293272 + "score":0.5854645421 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.1462646527 + "score":0.3913336262 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.3886405702 + "score":0.6467989318 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.1 + "score":0.2392486974 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.2238699363 + "score":0.4920626101 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.1 + "score":0.2130836675 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.1606469353 + "score":0.4622075168 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.0498052059 + "score":0.2893481535 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.2651327526 + "score":0.5215715176 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.0286452462 + "score":0.1419402772 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.2023398596 + "score":0.4014256358 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.122262497 + "score":0.249611031 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.3515428019 + "score":0.4991029967 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.1527676878 + "score":0.2942599953 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.4249238432 + "score":0.5860228525 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0279631361 + "score":0.0973366086 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.2321541854 + "score":0.3232614896 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0195222834 + "score":0.0552969578 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.1762015362 + "score":0.3314346183 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0127402107 + "score":0.0356994946 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.1651498064 + "score":0.2438060785 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0088808864 + "score":0.0183592041 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.1702476721 + "score":0.3076327609 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.0366762006 + "score":0.2403245803 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.1697470704 + "score":0.5023246313 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.0190635247 + "score":0.1415388613 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.1343627089 + "score":0.4055113288 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.2292007848 + "score":0.4519562833 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.3716655897 + "score":0.6328740374 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.2126702079 + "score":0.3918546765 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.3248815955 + "score":0.551810658 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.1085949014 + "score":0.2474554181 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.3346783911 + "score":0.490135462 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.1676106101 + "score":0.3387973296 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.3907504991 + "score":0.5550868321 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.1013445398 + "score":0.3065957195 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.3063125264 + "score":0.5567333989 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.2231857524 + "score":0.4450524918 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.4401829864 + "score":0.6712742861 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.0955350175 + "score":0.2717880574 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.3286351702 + "score":0.5367497902 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.1871630014 + "score":0.4990357373 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.3655570607 + "score":0.6703309998 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1005466956 + "score":0.2184907643 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.3356518748 + "score":0.4824691404 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.1363185356 + "score":0.3406091079 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.3769111636 + "score":0.5689518318 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.0189997083 + "score":0.1780482269 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.1919557381 + "score":0.4251975218 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.0165320564 + "score":0.1238628432 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.1417103032 + "score":0.3651594596 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.0441421075 + "score":0.2363136631 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.1739314177 + "score":0.4724628618 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0182129294 + "score":0.1012580658 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.119134604 + "score":0.3226539734 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.0302406554 + "score":0.1702954814 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.1787247799 + "score":0.4314499751 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.0014746217 + "score":0.1165218233 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.0931903615 + "score":0.3219658957 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0192199252 + "score":0.0415760658 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.1851933727 + "score":0.2726763268 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.016306816 + "score":0.059393934 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.1413916659 + "score":0.3265574234 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.0299103049 + "score":0.0974935448 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.1986832691 + "score":0.337395124 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0138432 + "score":0.0786475166 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.1610036541 + "score":0.3616383914 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.1315135307 + "score":0.2468393111 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.3968249514 + "score":0.497224405 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.1220351802 + "score":0.4165200238 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.3505317727 + "score":0.60128551 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.0605189037 + "score":0.2331294709 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.2644052383 + "score":0.4605502791 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.0323301168 + "score":0.1664653883 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.2153999563 + "score":0.4762879225 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.1638179638 + "score":0.3181506443 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.3812064776 + "score":0.5441377883 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.2333909009 + "score":0.384958542 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.4689649165 + "score":0.6364660715 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.0510060878 + "score":0.2373393477 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.2132741272 + "score":0.5017230165 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.0089146903 + "score":0.2334583695 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.2314109768 + "score":0.5603415221 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.0303451125 + "score":0.2147204762 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.1748071119 + "score":0.4649686586 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.0076177075 + "score":0.2451735521 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.1986647775 + "score":0.5352886898 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.1096331511 + "score":0.3767393472 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.2888090685 + "score":0.5870286691 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.1206114883 + "score":0.3415510311 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.2597106436 + "score":0.5592933672 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.0268228091 + "score":0.1801710665 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.1751009974 + "score":0.4238537274 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.0015866917 + "score":0.1125461134 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.1423262509 + "score":0.3456654305 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.0382115226 + "score":0.2385233061 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.2562543067 + "score":0.4987667959 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.0125933293 + "score":0.3202315883 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.1659603426 + "score":0.4748886274 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0161407336 + "score":0.0653357736 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.1568869137 + "score":0.3152990905 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0468367135 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.0241402636 + "score":0.1314286197 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.0907273605 + "score":0.2786347493 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.2666411269 + "score":0.5143010521 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.0495223383 + "score":0.3361615644 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.276468397 + "score":0.5732166456 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.1408426214 + "score":0.2384384134 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.3768513401 + "score":0.4989290832 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.1029189854 + "score":0.4068700974 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.3606378352 + "score":0.5784786574 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.023064469 + "score":0.0295327628 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.0922502173 + "score":0.168725075 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.047075079 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.1015661134 + "score":0.2446929278 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.0891185343 + "score":0.252958993 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.281793335 + "score":0.5053979802 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.0799576366 + "score":0.2339821201 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.2661903898 + "score":0.425306622 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.0287030985 + "score":0.236632122 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.2124751899 + "score":0.4793696196 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.0040250398 + "score":0.265421135 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.1706379305 + "score":0.5116987882 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.1389065496 + "score":0.2721498467 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.3708687542 + "score":0.5200799335 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.1661316612 + "score":0.3613736416 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.354399593 + "score":0.5866656133 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0242583204 + "score":0.0887261142 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.1929988599 + "score":0.2752257416 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.012814538 + "score":0.0469317169 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.1551759179 + "score":0.2008453897 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.0973024735 + "score":0.1844305556 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.3076531166 + "score":0.4417239043 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0161682999 + "score":0.1248600823 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.0635837055 + "score":0.1650750126 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.0478834907 + "score":0.0773908628 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.208762819 + "score":0.2972337309 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0095341532 + "score":0.0211167911 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.150635966 + "score":0.2699477659 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0080365175 + "score":0.0578164805 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.1647602539 + "score":0.2633608218 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0591720568 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.0909640555 + "score":0.1753356197 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1043191943 + "score":0.2018552397 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.3414129274 + "score":0.4546838419 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.0651436117 + "score":0.1477972133 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.1002582276 + "score":0.238559837 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1133138775 + "score":0.2259180607 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.3754190494 + "score":0.5258178103 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.1672636279 + "score":0.2474954475 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.232325354 + "score":0.2841722148 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.0206894768 + "score":0.1511814979 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.1952546166 + "score":0.3667501588 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0113901513 + "score":0.0635700737 }, { - "model":"gryphe\/mythomax-l2-13b", + "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.1302687289 + "score":0.3690469819 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-3.3-70b-instruct", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.2152631134 + "score":0.3304277157 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.4621033585 + "score":0.5443905094 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.1438913245 + "score":0.1851657228 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.3637806215 + "score":0.4035662808 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.0488692805 + "score":0.109249521 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.2406135335 + "score":0.3018012797 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0214880279 + "score":0.0601412463 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.2270659336 + "score":0.3378209538 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.1203676158 + "score":0.278050095 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.3241911739 + "score":0.5030892659 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0165994228 + "score":0.2056413876 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.1058904177 + "score":0.319475702 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.2332719546 + "score":0.3412628716 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.4924788322 + "score":0.6168944847 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.1544598614 + "score":0.2079777284 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.403814105 + "score":0.4737326062 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2609114367 + "score":0.3288891576 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5167379854 + "score":0.560493221 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.1932802581 + "score":0.3370289388 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.4648835751 + "score":0.5616376735 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1270864308 + "score":0.1877842343 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.3882289796 + "score":0.4430965745 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1366193757 + "score":0.1822974398 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3691581345 + "score":0.3877585115 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.1966421011 + "score":0.2820784803 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4161867731 + "score":0.5005600008 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.1531147508 + "score":0.1762767818 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.3635575685 + "score":0.3980584269 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.152657571 + "score":0.2678950965 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.4132859119 + "score":0.5139106802 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0663682991 + "score":0.1062771627 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.3007728685 + "score":0.3373123707 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"awa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.2739426076 + "score":0.3378377362 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5129388019 + "score":0.5424277928 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.1227671497 + "score":0.2039436913 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.3306179967 + "score":0.413295677 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.130005692 + "score":0.2078798411 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.3815764307 + "score":0.4443945632 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.136654027 + "score":0.1864199422 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.3809883299 + "score":0.4220840798 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.092815209 + "score":0.1482839317 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.4056492611 + "score":0.4568926673 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.2013843536 + "score":0.2815774482 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.417738842 + "score":0.4716025494 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.2451140745 + "score":0.2239129937 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.4883780153 + "score":0.4981762083 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1334730215 + "score":0.1952639614 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.363000921 + "score":0.40510597 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.1938367121 + "score":0.3130233588 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4674774016 + "score":0.5569163893 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.2073541352 + "score":0.3322667951 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.4438396219 + "score":0.5140240989 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.260902514 + "score":0.3401088117 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.4589913242 + "score":0.5684561927 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.2033313823 + "score":0.4507021781 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.4899690932 + "score":0.6542740054 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.1866723671 + "score":0.2563123252 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.3936568086 + "score":0.517826657 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0494661624 + "score":0.2261228199 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.320713519 + "score":0.4918700987 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.2435247423 + "score":0.3778406936 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.501836375 + "score":0.6293186521 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.1956638929 + "score":0.4207498261 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.4870538255 + "score":0.6224036774 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.2685056004 + "score":0.386239845 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.506818165 + "score":0.6110300223 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.3801657831 + "score":0.4508475568 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6056477234 + "score":0.6653203029 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2197756902 + "score":0.3208174129 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.4571715629 + "score":0.5441131834 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.2911113336 + "score":0.3167289307 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.4641387139 + "score":0.5086510118 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.3 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.2216031518 + "score":0.5649266234 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.4429472312 + "score":0.7240609445 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.3322887566 + "score":0.6160790992 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.5824514758 + "score":0.8133281991 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.1640783778 + "score":0.3152773331 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.434749516 + "score":0.5548531112 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3126220052 + "score":0.3335962816 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6038883227 + "score":0.5859881472 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.2147534918 + "score":0.2974755741 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.4874495537 + "score":0.548510072 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.094833194 + "score":0.198877694 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.3292486732 + "score":0.4016676481 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3074477197 + "score":0.3394346908 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.5168064726 + "score":0.5772205685 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.2380541489 + "score":0.2961737536 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.552589393 + "score":0.5732830973 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2261588318 + "score":0.3174175523 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4944996319 + "score":0.5682518332 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.4626001556 + "score":0.5173973527 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6614963779 + "score":0.7056428374 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0704357087 + "score":0.0282301718 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.214215478 + "score":0.2509358266 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0113955269 + "score":0.0302470726 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1874434226 + "score":0.2143808411 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.2250377214 + "score":0.336179684 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.4688956519 + "score":0.5448761462 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1515937263 + "score":0.1255076156 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4251249067 + "score":0.4301109075 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.0957125553 + "score":0.2176466652 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.3135046613 + "score":0.4472887488 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.1387309388 + "score":0.2048605344 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.383923158 + "score":0.4547067722 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3316295853 + "score":0.3796955055 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5594083443 + "score":0.6056742688 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3598049012 + "score":0.382370623 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5828568956 + "score":0.5922578575 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.1166136282 + "score":0.2583771315 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.3855078109 + "score":0.4968818998 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.0887957809 + "score":0.116404849 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.3187695245 + "score":0.355624191 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2237665442 + "score":0.2893259192 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.4896395702 + "score":0.5696888872 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.2614626337 + "score":0.3664678104 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.5280652466 + "score":0.6152145331 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2236143729 + "score":0.2523550022 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4912948296 + "score":0.5458277736 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.2910526755 + "score":0.3774331947 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6005590773 + "score":0.6486159416 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0845382562 + "score":0.2004372781 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.3303135434 + "score":0.4254008414 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.1267144204 + "score":0.2594342647 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.3667670284 + "score":0.4939201844 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.1301757317 + "score":0.1883645682 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.3568142061 + "score":0.4379611856 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.1161068297 + "score":0.166061461 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.4147684511 + "score":0.4725232576 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.2150043089 + "score":0.2923994901 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.4693148389 + "score":0.5447705341 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.2998342329 + "score":0.4037821428 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5545377546 + "score":0.6296260979 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2131020144 + "score":0.3130983776 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4845704057 + "score":0.5536124921 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.1446650781 + "score":0.3266051607 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.2292145443 + "score":0.4444671407 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.1840709267 + "score":0.3146151088 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4042090141 + "score":0.5434123174 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.1902389614 + "score":0.3068652176 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.4796942089 + "score":0.5696595268 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"kk", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.1500855 + "score":0.2107380254 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.4100527329 + "score":0.4991705013 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.1136442629 + "score":0.331371608 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.4236240472 + "score":0.5825812793 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.1899800627 + "score":0.3610313078 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.4618900518 + "score":0.605032383 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.087831891 + "score":0.1827077293 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.265907742 + "score":0.4206815495 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.1797031918 + "score":0.273765965 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.446466319 + "score":0.5541955864 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.1846339038 + "score":0.2689874625 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.4220658756 + "score":0.5150630417 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.1932392069 + "score":0.2599758467 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4204852284 + "score":0.4915631618 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.0476115004 + "score":0.1778560135 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.0790735292 + "score":0.2969773205 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0505101039 + "score":0.0622795727 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2502174391 + "score":0.2675764955 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0135029462 + "score":0.0381251381 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.1682326163 + "score":0.2812186233 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.2778994313 + "score":0.2963258613 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.534066621 + "score":0.5478698134 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.1661930328 + "score":0.2490501232 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.4001562798 + "score":0.470734921 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.212877318 + "score":0.2517024761 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.4587224182 + "score":0.5071223357 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.0873487304 + "score":0.1690507631 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.3409755146 + "score":0.4610312304 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.1013985932 + "score":0.245769233 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.3022487832 + "score":0.4441647844 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0611289601 + "score":0.1869691691 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.3842495071 + "score":0.4993277276 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.2670990652 + "score":0.3697837131 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5220692033 + "score":0.598263628 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.1981851908 + "score":0.2866761532 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4555314776 + "score":0.5500074549 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2152417217 + "score":0.3125095049 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.4578207034 + "score":0.5535296132 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.1654073391 + "score":0.2275791183 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.3941079443 + "score":0.4569915545 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.2763114217 + "score":0.345465339 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5353874356 + "score":0.5897544047 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.3315044625 + "score":0.4313143535 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6241092077 + "score":0.672362003 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.2083780287 + "score":0.3075903861 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.4466015977 + "score":0.578349632 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.0879797246 + "score":0.1897910105 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.3441521948 + "score":0.4717519215 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.2353570133 + "score":0.3557303786 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.4846450712 + "score":0.5879008408 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.1632876087 + "score":0.1851047496 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4158370821 + "score":0.4783025401 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2136151785 + "score":0.2465270522 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.4670269701 + "score":0.5037852263 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.2202915792 + "score":0.3149743955 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.5233013945 + "score":0.5635296931 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0757269477 + "score":0.1800815958 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.2848998148 + "score":0.4077560746 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0329429353 + "score":0.0667158921 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.193397393 + "score":0.3678415876 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0206783974 + "score":0.0536067183 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.2005424268 + "score":0.2934406231 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0171071488 + "score":0.0212136796 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.239253642 + "score":0.3367052821 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.1677012885 + "score":0.2884546263 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.4316686173 + "score":0.5474826718 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1202133569 + "score":0.2053276951 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.3816566526 + "score":0.4215800492 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.1891048622 + "score":0.4246522462 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.4238091524 + "score":0.6364852561 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.2086330089 + "score":0.439726575 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.364326938 + "score":0.5806732389 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.1998083747 + "score":0.2708043929 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.4667760664 + "score":0.5320865131 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3336132898 + "score":0.2827359953 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5541703282 + "score":0.5407300006 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2256776552 + "score":0.3451568022 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.4793769886 + "score":0.5845365036 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.3320822339 + "score":0.4582156105 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6132478102 + "score":0.6491023878 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.1978207058 + "score":0.3130092522 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.4682392821 + "score":0.5728467895 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.409911871 + "score":0.503492173 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6078072484 + "score":0.6731092747 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1730354472 + "score":0.2567420946 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.454951133 + "score":0.5061211552 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3073459183 + "score":0.4025875747 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.554890569 + "score":0.6083538055 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.0841096684 + "score":0.1596218382 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.2671346741 + "score":0.3642420922 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.0409000825 + "score":0.1505395069 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.3185422263 + "score":0.4131000516 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sd", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.1343511225 + "score":0.3060556532 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.385804118 + "score":0.5236224481 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0662090824 + "score":0.2303677631 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.3030628402 + "score":0.4550652237 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.1430431721 + "score":0.2505378464 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.3858131555 + "score":0.4777570712 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.1602151366 + "score":0.238193304 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.357355981 + "score":0.4255249112 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0399118136 + "score":0.1176743345 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.2285191544 + "score":0.3404474685 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.0269601697 + "score":0.1289978109 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.3006594703 + "score":0.426186094 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.0705893599 + "score":0.2374825749 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.26834967 + "score":0.4257984603 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0988956652 + "score":0.1278040594 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.3896116232 + "score":0.4360467159 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2014976842 + "score":0.2878836927 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.470196154 + "score":0.5437304451 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.2922893702 + "score":0.4038152012 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.504298223 + "score":0.5978824564 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.1538893384 + "score":0.2373274152 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4005055487 + "score":0.4608187705 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.123816759 + "score":0.1899355362 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.4480197519 + "score":0.4693159323 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.2739803344 + "score":0.298223826 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5286596224 + "score":0.5684457257 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.3468145126 + "score":0.3825937295 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6078627072 + "score":0.6393626909 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.1798653655 + "score":0.3425500041 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.4290347271 + "score":0.5680420054 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.1709237193 + "score":0.2980763573 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.522080463 + "score":0.5924685945 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.1405088565 + "score":0.2473591284 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.4088204523 + "score":0.5382867852 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.2360333676 + "score":0.3068229029 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5038093602 + "score":0.5637367471 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.2507091988 + "score":0.3462186566 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5067409185 + "score":0.6055817314 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.2998213366 + "score":0.3822149946 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5112516267 + "score":0.5783618359 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.1029119511 + "score":0.2141866571 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.37770665 + "score":0.4746491206 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.1507872741 + "score":0.2383903304 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.3945276116 + "score":0.4750617701 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2022308722 + "score":0.2652851581 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.4620737135 + "score":0.5278626321 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.2592698778 + "score":0.3718263092 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.4253112122 + "score":0.5255136074 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.052152068 + "score":0.1999962108 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.2578037483 + "score":0.4183108341 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0237309602 + "score":0.1133461632 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.0808540368 + "score":0.2113810541 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2231619401 + "score":0.2978237586 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.4653230255 + "score":0.5152360665 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.2701457179 + "score":0.3447394658 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5043061571 + "score":0.5850690403 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2437336637 + "score":0.2589937034 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.5042825637 + "score":0.5348601679 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.3231504544 + "score":0.3544986277 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5440009645 + "score":0.5913345073 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.5 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0461126383 + "score":0.0650028377 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.1577698173 + "score":0.1912574022 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0008007826 + "score":0.0525305732 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.1426831674 + "score":0.2699878572 + }, + { + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.194871957 + "score":0.253549974 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.4838690709 + "score":0.5235064606 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.1908512613 + "score":0.2484556869 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.3855814375 + "score":0.429442787 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.177513842 + "score":0.2701010494 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.386230097 + "score":0.5241051692 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.2544813414 + "score":0.3181656056 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.4948262917 + "score":0.5907906511 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2003733128 + "score":0.2670924013 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.4613442635 + "score":0.5198891912 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3326058501 + "score":0.3597766713 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5496097026 + "score":0.6081806669 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0685259305 + "score":0.0741464388 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.234718691 + "score":0.2660826012 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0319335459 + "score":0.0695734356 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.1970529604 + "score":0.2997632689 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.1538409962 + "score":0.2653942694 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4398974306 + "score":0.4893923691 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0373503953 + "score":0.1157826458 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.074080246 + "score":0.1765005496 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.0707946688 + "score":0.1376742076 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.2881582575 + "score":0.3734228567 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0204932467 + "score":0.0864151864 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.2690069759 + "score":0.4104899998 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0339322053 + "score":0.0937886749 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.2166922882 + "score":0.3196766983 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0169319822 + "score":0.1093519063 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.1617793532 + "score":0.3224056963 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"yue", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.174955946 + "score":0.2204568545 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4554476081 + "score":0.4894165826 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1400846872 + "score":0.1569463992 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.1809201358 + "score":0.232785021 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":1.0 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1869648376 + "score":0.2703289724 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4664675781 + "score":0.5473102513 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.1696786467 + "score":0.317769874 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2325846099 + "score":0.3676906362 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.0515154857 + "score":0.2474518428 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.2819357103 + "score":0.5019415605 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0804926199 + "score":0.19092807 }, { - "model":"meta-llama\/llama-3-70b-instruct", + "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.3473550746 + "score":0.4983188666 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"meta-llama\/llama-4-maverick", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.2561356588 + "score":0.1327783313 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.49648558 + "score":0.3636961218 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.2071032166 + "score":0.1557273583 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.4109686249 + "score":0.3515058711 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.1091965593 + "score":0.0375398146 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.3173514683 + "score":0.1986406573 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0490503072 + "score":0.0015655622 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.2649288206 + "score":0.0623964125 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.1375609672 + "score":0.0476085337 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.3710873948 + "score":0.205897506 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0526682247 + "score":0.0110136998 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.1185738392 + "score":0.0688367427 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.2624553878 + "score":0.0927070911 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.5118695802 + "score":0.3300356171 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.2175864677 + "score":0.1909661669 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.4641969296 + "score":0.4391780261 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2766513185 + "score":0.1548779531 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5528193738 + "score":0.3887963415 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3371708551 + "score":0.268706305 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5607841978 + "score":0.4959259833 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1412140088 + "score":0.0374286633 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4082636767 + "score":0.276784029 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1613157357 + "score":0.1363017113 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3896124669 + "score":0.3536429421 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.2374604323 + "score":0.106947781 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4585263555 + "score":0.3169488071 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2300327193 + "score":0.1479958867 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.43482663 + "score":0.3761797641 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.220118305 + "score":0.0312538317 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.4767360664 + "score":0.2922637643 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0623766799 + "score":0.0234894436 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.2986098722 + "score":0.2225963414 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"awa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"awa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.3261928856 + "score":0.2171579973 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5339015736 + "score":0.456713607 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.2330787096 + "score":0.1681044686 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.4427484336 + "score":0.3440820027 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.1767618659 + "score":0.0298418752 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.4088166263 + "score":0.2580610439 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1381246624 + "score":0.041095966 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.3730280956 + "score":0.2930729253 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.161760748 + "score":0.0556571943 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.450732576 + "score":0.3548784075 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.2170520787 + "score":0.066482798 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.4098923096 + "score":0.3295363828 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.2731033294 + "score":0.1569501012 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.5159041397 + "score":0.4039420627 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.2075901182 + "score":0.1151073387 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.4086862509 + "score":0.3126448605 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2623045124 + "score":0.095240952 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.512895511 + "score":0.3840548344 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3328087961 + "score":0.1876367188 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5013967236 + "score":0.4310988737 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ceb", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ceb", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.36364203 + "score":0.1916153649 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.5712362729 + "score":0.4266920518 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.2701190878 + "score":0.0532026402 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.551918321 + "score":0.2442653709 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.1903904403 + "score":0.0233838479 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.4687496067 + "score":0.2071232952 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0520773173 + "score":0.0030055044 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.3368230674 + "score":0.1680771697 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.3416129059 + "score":0.1169562212 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.5845038999 + "score":0.4547809891 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.2726323508 + "score":0.2167266047 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5283879512 + "score":0.4629060689 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3059254014 + "score":0.2546126219 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5403551155 + "score":0.4840060449 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4588037752 + "score":0.3171866034 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6692431614 + "score":0.5752285995 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.268843518 + "score":0.1236158233 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.493449014 + "score":0.3922493462 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3403168702 + "score":0.1783375751 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5086292148 + "score":0.4003787241 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5486108614 + "score":0.3642023499 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6873139374 + "score":0.5697992815 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.691450825 + "score":0.4959810553 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8407064328 + "score":0.7232313255 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3021494986 + "score":0.1732534835 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5604145602 + "score":0.4434970776 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3865320679 + "score":0.3605235101 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6126903448 + "score":0.5849733787 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.3383579693 + "score":0.0868573088 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5641679075 + "score":0.3068232268 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.2069613925 + "score":0.0883262705 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.4288746449 + "score":0.3294670602 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3771163962 + "score":0.1921595243 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.5827777548 + "score":0.4381909531 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.3225619014 + "score":0.2102834142 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.5830090459 + "score":0.503934087 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2924853239 + "score":0.1763652726 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5462367408 + "score":0.4428784232 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.4963618411 + "score":0.3772793055 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.682573515 + "score":0.5820724576 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0727409119 + "score":0.0067893116 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.1973579541 + "score":0.2007893146 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0147511412 + "score":0.0132875082 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1172295571 + "score":0.1300679396 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.3220032872 + "score":0.1581139234 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.537784261 + "score":0.3992847318 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.2206953431 + "score":0.1006122628 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4990640113 + "score":0.3529986856 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.1527043255 + "score":0.0310807341 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.4074071592 + "score":0.2470826922 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.1567942198 + "score":0.0634023566 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.4125213011 + "score":0.2955476351 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3914590212 + "score":0.2788689746 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6128431348 + "score":0.5417455941 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.4189427376 + "score":0.2369610218 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6262550579 + "score":0.3765795877 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.2338808528 + "score":0.1094117889 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.4544102907 + "score":0.3715115564 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1243598882 + "score":0.0627224628 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.4051657211 + "score":0.3120135336 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2678903597 + "score":0.1148528139 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.520149627 + "score":0.3969632133 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.3361911519 + "score":0.2004789157 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.5583527487 + "score":0.4541545495 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.290274553 + "score":0.0855471394 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5143960108 + "score":0.3720740561 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3789023659 + "score":0.284365864 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6751523776 + "score":0.589202199 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.1640789976 + "score":0.0344910359 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.4462743519 + "score":0.2176253825 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.1964520184 + "score":0.0020930717 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.3861299089 + "score":0.0498013123 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.1899251487 + "score":0.0788515324 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.428775702 + "score":0.3023201397 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.1861284915 + "score":0.0177354807 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.4568269097 + "score":0.2429763441 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.296179579 + "score":0.1849581121 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5259369403 + "score":0.4562979327 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3187240753 + "score":0.2591551222 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5786749514 + "score":0.5311194854 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.3306727326 + "score":0.1462066826 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5648258387 + "score":0.4299215293 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.262512317 + "score":0.2371087689 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4036795798 + "score":0.4064089202 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.2593989014 + "score":0.1102793601 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4825732152 + "score":0.3718502317 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2177203514 + "score":0.0999035402 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.452576603 + "score":0.3664761129 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.2013677498 + "score":0.0537848954 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.4740327886 + "score":0.3139411656 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.2015887265 + "score":0.0666822222 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.4935901226 + "score":0.3622322436 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.3098535214 + "score":0.0753739979 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.5740290935 + "score":0.3445771251 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.0913702814 + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3128837987 + "score":0.1701733674 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.2155836452 + "score":0.104202131 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.4583181839 + "score":0.3590734072 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.2468017951 + "score":0.0705324379 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.4734415865 + "score":0.3025321109 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2276258723 + "score":0.1107444823 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4577745447 + "score":0.3916459404 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.3046437152 + "score":0.1649626358 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.3825631739 + "score":0.2656552119 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.1034813211 + "score":0.0539171508 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2900727397 + "score":0.2436825008 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0357350273 + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.2486408005 + "score":0.1581276083 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.3838720489 + "score":0.1658903033 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.5776118345 + "score":0.4217933103 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.200683389 + "score":0.0803859812 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.4892134584 + "score":0.3220461814 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mai", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"mai", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.2846718719 + "score":0.1018185799 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.5239506053 + "score":0.3881585962 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.135802286 + "score":0.0840554004 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.3994207414 + "score":0.3260852936 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.1470288737 + "score":0.036255172 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.370597998 + "score":0.2361171448 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0624257747 + "score":0.0369324798 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.3667603362 + "score":0.3426601677 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ml", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ml", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.271926141 + "score":0.1675392326 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5318057054 + "score":0.4161590898 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.2579782083 + "score":0.101823454 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4806502272 + "score":0.3124446375 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2987546297 + "score":0.1012625471 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.526361525 + "score":0.3580813711 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.1515083487 + "score":0.0891010327 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4332812085 + "score":0.3158325956 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.3458633411 + "score":0.1211248924 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5703964991 + "score":0.389246098 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.4050410338 + "score":0.2367123999 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6535211779 + "score":0.5273473365 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.2598947984 + "score":0.1262296798 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.4881220333 + "score":0.3480250641 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.2015903544 + "score":0.104091386 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.4513077936 + "score":0.3135377948 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.260901002 + "score":0.1488007297 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.4896606547 + "score":0.4132412315 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.1429173328 + "score":0.0478735067 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4071639857 + "score":0.3069618299 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2472539314 + "score":0.1523025562 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.4972878378 + "score":0.4096891017 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.2865823477 + "score":0.2469695748 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.5850706516 + "score":0.524876157 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0888165228 + "score":0.0369494139 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.2927704081 + "score":0.2350129808 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0527161443 + "score":0.0015518794 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.3183888298 + "score":0.1140211549 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.024472012 + "score":0.0103766134 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.2143061298 + "score":0.1821363344 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0100859589 + "score":0.0003856632 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.2528047704 + "score":0.0985339751 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.285587224 + "score":0.1279029727 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.5216659729 + "score":0.3504496172 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1281773816 + "score":0.1158076498 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.3720376754 + "score":0.3583374616 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4479091606 + "score":0.3550414512 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6272058507 + "score":0.5626107823 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4155919737 + "score":0.2784963846 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5685427433 + "score":0.4121299981 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2568309796 + "score":0.1049411882 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5058927884 + "score":0.371724232 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3323405641 + "score":0.2126550777 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5569354008 + "score":0.4754992095 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.2 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3183331223 + "score":0.1813353123 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5559144449 + "score":0.4632560004 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4500659682 + "score":0.3450201321 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6761551234 + "score":0.5827805827 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.2803966495 + "score":0.1323104842 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5308215606 + "score":0.3747307468 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.5346576918 + "score":0.2733723845 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.7024180686 + "score":0.5057937589 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2051262499 + "score":0.164734586 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4837372958 + "score":0.4400610126 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3699194641 + "score":0.2884407046 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5770913921 + "score":0.5338739518 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.1806014296 + "score":0.0202859007 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.4291519278 + "score":0.2084128437 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.0966721561 + "score":0.0129709626 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.3133383199 + "score":0.1407028363 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.2666171334 + "score":0.0476641683 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.4765001737 + "score":0.1691869095 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.1168753501 + "score":0.0111247819 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.3522716786 + "score":0.151377306 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.2270309753 + "score":0.0307653909 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.4841536531 + "score":0.2382457281 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.1352090178 + "score":0.0240096696 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.3326584955 + "score":0.154444722 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.067782587 + "score":0.0116292791 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.2594122638 + "score":0.1897831748 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.0496762437 + "score":0.000876482 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.2555516699 + "score":0.066397943 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.1103212906 + "score":0.0221364496 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.3317936338 + "score":0.2036637198 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.1178727843 + "score":0.0300378344 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.3706477532 + "score":0.146034089 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2704960778 + "score":0.1551096033 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.5452851397 + "score":0.4297549368 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.395555296 + "score":0.200397515 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5799018584 + "score":0.4351193348 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.253350788 + "score":0.0631852964 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.458172945 + "score":0.3127999721 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.1663559354 + "score":0.0328870671 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.4946292339 + "score":0.285042966 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.3079953173 + "score":0.1749499193 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5447429639 + "score":0.4691275614 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.3935950974 + "score":0.2454574882 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6342780862 + "score":0.5348019826 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.2983575506 + "score":0.1325294802 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5283269577 + "score":0.4051925402 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.3087303367 + "score":0.1631216823 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6028401745 + "score":0.4696161488 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2210855899 + "score":0.1338729952 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.4616487061 + "score":0.3640492116 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.2867265678 + "score":0.1115670494 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5646798034 + "score":0.4303510763 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"te", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.357367421 + "score":0.2374723306 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5659157865 + "score":0.4675485501 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3515649071 + "score":0.2528444882 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5645706042 + "score":0.4794045124 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.1896909211 + "score":0.00699528 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.4144350541 + "score":0.2382738034 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.202234159 + "score":0.0018437478 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.4284203038 + "score":0.2225002567 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2422208114 + "score":0.1140490825 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.4745802383 + "score":0.3668015685 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.3118363896 + "score":0.1317498141 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.4892437324 + "score":0.2770372268 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0689409767 + "score":0.0342955291 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.2722531305 + "score":0.2066101372 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0406781073 + "score":0.0101983319 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.0970886698 + "score":0.0539238863 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2772171859 + "score":0.1209396556 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5118206984 + "score":0.3409074931 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3271066365 + "score":0.270580586 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5803660329 + "score":0.537606561 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2451818788 + "score":0.1285741979 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.4929183421 + "score":0.4057727321 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.2948409512 + "score":0.1865914948 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5143250682 + "score":0.4565231191 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0677223644 + "score":0.0177160721 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.1661136189 + "score":0.1342994379 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0419433658 + "score":0.0010895392 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.1567633534 + "score":0.0283042279 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2584265792 + "score":0.1155003818 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5020697076 + "score":0.3250077925 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2624388601 + "score":0.1195513435 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4452300688 + "score":0.3158904676 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.2640324297 + "score":0.0676473408 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.5196545965 + "score":0.2672641675 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.2748329219 + "score":0.0166808106 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.5341998684 + "score":0.2361978954 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2872551102 + "score":0.1452425625 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5360250569 + "score":0.3941195385 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3575088107 + "score":0.2680553268 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5879974234 + "score":0.5055559664 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0892196115 + "score":0.0218129891 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.266303191 + "score":0.2316696377 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0392376693 + "score":0.0005135911 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.1442620012 + "score":0.0644762753 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.2174626032 + "score":0.0721904827 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4686565248 + "score":0.3322122834 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0941401506 + "score":0.0963078281 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1690248565 + "score":0.1506583582 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.0921572696 + "score":0.0658670408 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.3072945662 + "score":0.2264957148 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.052810575 + "score":0.0240888197 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.2387421258 + "score":0.1589846026 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0706373037 + "score":0.0361610953 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.2741240176 + "score":0.2209617429 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0463945559 + "score":0.0152526027 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.1664955069 + "score":0.1066841292 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.2098595469 + "score":0.1360263411 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4603190119 + "score":0.4135302369 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.0831701767 + "score":0.1427052583 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2065328404 + "score":0.2190638456 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2619931658 + "score":0.1516445239 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5251502482 + "score":0.4517979691 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2725475868 + "score":0.2058198052 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3148065512 + "score":0.2799124898 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.1611133093 + "score":0.0158837296 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.379825756 + "score":0.2111229219 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0702261565 + "score":0.0440027048 }, { - "model":"meta-llama\/llama-3.1-70b-instruct", + "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.3706020457 + "score":0.1856579938 + }, + { + "model":"microsoft\/phi-4", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.1723520986 + "score":0.1381244544 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.4083355109 + "score":0.3106084366 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.1631097827 + "score":0.1319133586 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.3376536763 + "score":0.2261398681 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.0539023061 + "score":0.0101349522 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.1825644081 + "score":0.0727946226 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0390551527 + "score":0.0087636854 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.1712135521 + "score":0.0326918009 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"arc", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0584274365 + "score":0.0143438883 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.2032749366 + "score":0.0651139855 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.0680986525 + "score":0.0145699741 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.1147231502 + "score":0.1077126314 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.3942588381 + "score":0.3303312588 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.1208986545 + "score":0.0568249639 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.329533412 + "score":0.1985159581 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2166642565 + "score":0.1428907436 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.4531623464 + "score":0.3107041775 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.2433592841 + "score":0.1849770017 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.3890038008 + "score":0.2932088535 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.0907074882 + "score":0.0246413933 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.3354239861 + "score":0.2220905764 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.0993142086 + "score":0.0462912201 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3089493054 + "score":0.2082310898 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.1494352369 + "score":0.021812522 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.3928912763 + "score":0.1700035697 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.212669524 + "score":0.0687018163 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4040671507 + "score":0.213092048 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.1385938854 + "score":0.0516867052 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.3301851724 + "score":0.2165108464 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0056778273 + "score":0.0097728449 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.2000094468 + "score":0.1483692036 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.1920493262 + "score":0.08262787 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.3987398592 + "score":0.281005553 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.1143530268 + "score":0.0481979333 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.3000528345 + "score":0.2232523474 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.090597898 + "score":0.0805820584 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.3141862136 + "score":0.2555758551 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.0583354997 + "score":0.0296286693 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.2066334095 + "score":0.1319853113 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.0984443913 + "score":0.0173366455 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.385398237 + "score":0.220616462 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.0906994852 + "score":0.0065148659 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.3103360752 + "score":0.1526491803 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.1762047672 + "score":0.0459721625 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.4472079773 + "score":0.2229551601 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1314817408 + "score":0.0233985631 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.3114200678 + "score":0.1665184954 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.1749870589 + "score":0.0355167863 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.3959743603 + "score":0.2600874171 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.1992458221 + "score":0.0323184525 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.3995820747 + "score":0.1970289791 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.1578229782 + "score":0.0892751266 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.3697628875 + "score":0.2388608153 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.0776521407 + "score":0.0416926889 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.2805707324 + "score":0.1552666429 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.0726133349 + "score":0.020551822 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.2357676924 + "score":0.1292684598 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0025842148 + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.2080643735 + "score":0.0744822177 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.2220546496 + "score":0.0527597248 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.4769435204 + "score":0.3026154166 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.2026562463 + "score":0.0777044688 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.3851807662 + "score":0.3081482084 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.2301502689 + "score":0.1237340737 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.4644650793 + "score":0.3601104142 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.2921772734 + "score":0.1371705946 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.4951314188 + "score":0.4120757797 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2059164492 + "score":0.0837672025 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.4480550849 + "score":0.2648038016 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.0964637139 + "score":0.0885028071 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.2014449762 + "score":0.2007646735 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4124666467 + "score":0.3548422361 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.582315662 + "score":0.460765953 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.5519762012 + "score":0.4690424472 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7446058172 + "score":0.6788013861 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"es", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.2337331881 + "score":0.1169662945 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.4523774022 + "score":0.3242693179 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.2045157252 + "score":0.2556403143 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.4389018917 + "score":0.4583071754 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.1671223511 + "score":0.0271486292 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.4271635079 + "score":0.2182731449 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.0972518606 + "score":0.0526574176 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.3009816209 + "score":0.2424108963 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fil", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.2033697572 + "score":0.0762125847 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.4085758597 + "score":0.2228549327 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.1337024916 + "score":0.0269063649 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.3569658269 + "score":0.2401408344 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2381890937 + "score":0.0839707225 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4806457757 + "score":0.3074010094 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.3213357404 + "score":0.2478840637 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.5096591733 + "score":0.4205657928 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0354285129 + "score":0.0290727628 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.1982926593 + "score":0.1841843114 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0009992134 + "score":0.0523495621 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.0462874193 + "score":0.1231670583 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.168790959 + "score":0.0005257422 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.3588726594 + "score":0.0721265952 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.0998466384 + "score":0.000262224 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.3089879911 + "score":0.0118348356 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ha", + "task":"arc", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ha", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.0730728705 + "score":0.0318398305 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.2880810134 + "score":0.1834830244 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.032930228 + "score":0.0320718253 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.1836595807 + "score":0.1032515167 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"arc", + "metric":"accuracy", + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.2951777856 + "score":0.1989310744 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5193991656 + "score":0.408792844 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.332068694 + "score":0.2107266229 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5232846835 + "score":0.3722535388 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.0924512162 + "score":0.0223204074 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.2983835975 + "score":0.1959765545 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.081522251 + "score":0.022115131 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.3091578658 + "score":0.1882969266 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.1578469391 + "score":0.1149995432 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.3871847635 + "score":0.3052316233 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.0748322147 + "score":0.1017437337 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.2638608298 + "score":0.2651695911 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.1717243204 + "score":0.1251179936 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4178336488 + "score":0.3078536626 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.1865357446 + "score":0.1049757961 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.429444403 + "score":0.338086632 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ig", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0478268633 + "score":0.0185191424 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.2570257732 + "score":0.1790132896 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.0018410191 + "score":0.0151653031 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.1558631502 + "score":0.0537338226 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.0364948502 + "score":0.0341024751 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.2541169376 + "score":0.2126115238 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.0232335134 + "score":0.0221151729 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.2533249921 + "score":0.1431429685 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"it", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.1826323546 + "score":0.1041933329 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.4460216478 + "score":0.3064701129 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.1650968642 + "score":0.1299185029 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.3244010767 + "score":0.3570513672 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1933631076 + "score":0.1170990874 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4686346426 + "score":0.3281623219 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.1629637627 + "score":0.1166577127 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.265082494 + "score":0.2303280443 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.0726508623 + "score":0.0192945074 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.3051851212 + "score":0.2015068169 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.1282743245 + "score":0.045857499 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.3321469572 + "score":0.1778848232 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.1233188769 + "score":0.0278653757 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.3275167565 + "score":0.2309769046 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.0449879835 + "score":0.0264488684 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.2826754009 + "score":0.1618433519 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.1013719184 + "score":0.0187368299 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.2292361214 + "score":0.210610547 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.0071013458 + "score":0.0070803381 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.1783377636 + "score":0.0602951272 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.6 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.1401399582 + "score":0.0343738545 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.3051974688 + "score":0.1971697601 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.0942053588 + "score":0.012430185 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.3363687748 + "score":0.0969965616 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ko", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.1324044329 + "score":0.1011791445 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.3790251178 + "score":0.2665626277 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.1547958954 + "score":0.0674482283 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.2281907624 + "score":0.1439352867 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.04633447 + "score":0.0333812973 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2489624204 + "score":0.2140071833 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0124095873 + "score":0.013803565 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.0933720751 + "score":0.0856760144 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.2273546789 + "score":0.0857349903 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.4493238694 + "score":0.2682295704 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.1724674084 + "score":0.0363984536 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.3974478706 + "score":0.2133514375 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.1570380948 + "score":0.0831966089 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.4536227649 + "score":0.3252283455 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.0768901169 + "score":0.0100264548 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.2918693283 + "score":0.2049643183 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mg", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.0180474118 + "score":0.0402775114 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.2099411444 + "score":0.2132968488 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0414049927 + "score":0.0178624704 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.2278780188 + "score":0.1584836987 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.171951865 + "score":0.044306682 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.3835379408 + "score":0.2110608123 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.1659756056 + "score":0.0101250707 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4178488097 + "score":0.1446641679 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.144541816 + "score":0.0250471784 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.3648764501 + "score":0.2097577846 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.1047482644 + "score":0.034382114 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.2965277611 + "score":0.2211758055 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.2497767256 + "score":0.120023798 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.4824221555 + "score":0.3039131897 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.1854347311 + "score":0.1137229069 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.331339842 + "score":0.3446031673 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.0578909241 + "score":0.1101780964 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.2614924673 + "score":0.2424045636 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.1528509146 + "score":0.0971253665 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.347684852 + "score":0.1223804901 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ne", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.1703660079 + "score":0.0522706053 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.4009054828 + "score":0.2509451803 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.0612834989 + "score":0.0404811569 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.3159716539 + "score":0.2098515398 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"nl", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.1766082572 + "score":0.1127735687 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.4438802086 + "score":0.3096427976 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.2113751191 + "score":0.1171995651 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.3971789312 + "score":0.39693057 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ny", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0288119948 + "score":0.0249203424 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.1974757162 + "score":0.183758763 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0004462318 + "score":0.0148302605 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.1835550789 + "score":0.1329930306 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0322766368 + "score":0.0171568718 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.1548467965 + "score":0.1561109456 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0104672499 + "score":0.0097264241 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.1495897529 + "score":0.0452833915 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.1266427785 + "score":0.0554840251 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.3740164648 + "score":0.1538079363 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.0281188176 + "score":0.0033288372 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.1879339756 + "score":0.036508675 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3595710948 + "score":0.3055395757 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.539538196 + "score":0.4480585816 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.3110476538 + "score":0.2165906221 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.4964416902 + "score":0.3271537328 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pl", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.1791082561 + "score":0.1017362354 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.4315704667 + "score":0.2782010079 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.1725231395 + "score":0.1041302213 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.3476924531 + "score":0.2865629267 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2073899393 + "score":0.1031395116 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.4691838054 + "score":0.3223915745 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.1956364634 + "score":0.1194174782 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.3100521945 + "score":0.3618255907 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.1 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ro", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.2131558334 + "score":0.0901332073 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.4133959067 + "score":0.2638668804 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.1729368847 + "score":0.1408494847 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.3072044813 + "score":0.2546101322 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1671614266 + "score":0.0826481083 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4422244829 + "score":0.2913230821 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.1893614933 + "score":0.1808682916 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.4605763082 + "score":0.3815777762 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.0204702219 + "score":0.0012309971 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.2077171561 + "score":0.0385987025 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.0015730125 + "score":0.0093358773 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.1513689177 + "score":0.0457261214 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.1191725522 + "score":0.0448599501 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.3048062246 + "score":0.1691371082 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0235301378 + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.1674956015 + "score":0.0527194634 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", - "task":"classification", + "task":"arc", "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.079322028 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.330454367 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0184455254 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.1256002879 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sn", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.038846423 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.2024296237 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0139189497 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.1438358845 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.027300431 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.1864710841 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0183915432 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.1602963374 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sr", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"si", + "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.139057659 + "score":0.0187725283 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.366792492 + "score":0.1451005114 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.237878029 + "score":0.0072043177 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.4452128635 + "score":0.0622904587 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"su", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sn", + "task":"arc", "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1119370423 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3476430008 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.016563929 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.225837323 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sv", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sv", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2563194972 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.498105827 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.2800341704 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.4239442229 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sw", - "task":"mgsm", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sn", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.153544279 + "score":0.015192186 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.381670314 + "score":0.1562018554 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.0470329064 + "score":0.0146518601 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.3516233998 + "score":0.0664358997 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ta", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"so", + "task":"arc", "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.116565827 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.3259696821 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.1799251119 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.4422961909 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"te", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"te", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.2177874056 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.4228885108 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.2215396728 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.4514044695 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tg", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"so", + "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tg", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.0505657194 + "score":0.0168598973 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tg", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.297617816 + "score":0.1350682776 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tg", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0562697145 + "score":0.0192034206 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tg", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.2287718392 + "score":0.1607323446 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"th", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sr", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.1077250531 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.3466580166 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2151882368 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.3617567446 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ti", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0275426369 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.1625316134 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0540664405 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tr", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sr", + "task":"mgsm", "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.1246699911 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.3740469363 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.1584629952 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.3608300472 + "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uk", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sr", + "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uk", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.1934348695 + "score":0.0538059584 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uk", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.4161013327 + "score":0.2453781212 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uk", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.2103705273 + "score":0.0917605905 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uk", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.4294671925 + "score":0.2668905804 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"umb", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", + "task":"arc", "metric":"accuracy", "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0244434593 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1675541923 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0007178002 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1007849921 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ur", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.142024307 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.3925303581 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.13811776 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.3521086255 + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uz", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", + "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uz", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.1642228534 + "score":0.0217870696 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uz", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.3736012886 + "score":0.2165031068 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uz", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.055748564 + "score":0.0464674805 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"uz", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.2468016347 + "score":0.2235940604 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"vi", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sv", + "task":"arc", "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1850447544 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.4328029287 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.2262846976 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.3523271738 + "score":1.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wo", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wo", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0509608434 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.225219126 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0166144137 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.160771848 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wuu", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sv", + "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wuu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.1255642972 + "score":0.1007032416 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wuu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.3734878168 + "score":0.2719560518 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wuu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.0067248884 + "score":0.1266242057 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"wuu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.0887426693 + "score":0.3334538145 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"xh", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"arc", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"xh", - "task":"mgsm", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"classification", "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0447439631 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.2216512685 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0026591548 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.1403714517 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yo", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yo", - "task":"mgsm", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yo", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.0254727037 + "score":0.0422003709 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yo", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.2062253383 + "score":0.253591842 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yo", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.0094132633 + "score":0.0280729387 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yo", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.0989408911 + "score":0.1884927612 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yue", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"sw", + "task":"truthfulqa", "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1143976046 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.3625809543 - }, - { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1277032117 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2179430009 + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.6 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2115579648 + "score":0.0366379898 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.4634573062 + "score":0.1847934746 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.1903215556 + "score":0.0141355453 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.2429625775 + "score":0.1724636201 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.0151163425 + "score":0.1550101498 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.1971831566 + "score":0.3623113506 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.023417855 + "score":0.1217984824 }, { - "model":"meta-llama\/llama-3.1-8b-instruct", - "bcp_47":"zu", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.1827820551 + "score":0.2801870917 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ar", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ar", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.0534412445 + "score":0.0606788965 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ar", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.1423750168 + "score":0.1688995018 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ar", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.0522677244 + "score":0.0007119113 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ar", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.1317238701 + "score":0.0386741345 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"bn", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"th", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"bn", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"bn", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.0853746951 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"bn", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.0061046998 + "score":0.3009803927 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"bn", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.048395385 + "score":0.0676677726 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"bn", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.1624192131 + "score":0.2051763344 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"de", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.1 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"de", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"de", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0365328036 + "score":0.0118399471 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"de", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.1594198405 + "score":0.0818929883 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"de", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.089863703 + "score":0.0 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"de", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.2290725718 + "score":0.0131103824 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"en", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"en", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"en", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tr", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.1186358374 + "score":0.1075252941 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"en", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.2861009841 + "score":0.2810155518 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"en", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.0659782167 + "score":0.0683323294 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"en", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.1219080159 + "score":0.2657241512 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"es", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uk", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"es", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"es", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uk", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.0197183104 + "score":0.0921333598 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"es", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.1858500787 + "score":0.2640680177 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"es", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.1372446539 + "score":0.1042804602 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"es", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.1970334371 + "score":0.257176459 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"fr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"fr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0225723033 + "score":0.0453126073 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"fr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.2128372612 + "score":0.1567880475 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"fr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0900337367 + "score":0.027790575 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"fr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.2030113676 + "score":0.0855724163 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"hi", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"arc", "metric":"accuracy", "score":0.5 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.1275394125 - }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.2249822055 - }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.1164104087 + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"classification", + "metric":"accuracy", + "score":0.7 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.2255759794 + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"id", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", + "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"id", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.0396919548 + "score":0.096255918 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"id", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.193498077 + "score":0.2490196736 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"id", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.0463130015 + "score":0.0759551519 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"id", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.147263979 + "score":0.2765897266 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ja", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ja", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ja", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.0196332917 + "score":0.030810794 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ja", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.0615297483 + "score":0.1804383237 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ja", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.002077843 + "score":0.0151653031 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ja", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.0228028425 + "score":0.0766086067 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"jv", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"arc", "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.0192847454 - }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.1432000045 + "score":0.6 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.0179600462 + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"classification", + "metric":"accuracy", + "score":0.9 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.1308378956 + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"mr", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", + "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"mr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.0103335448 + "score":0.1578714698 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"mr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.0394434629 + "score":0.3784433754 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"mr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.0093410183 + "score":0.1713340477 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"mr", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.0758287765 + "score":0.3260532752 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pa", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pa", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wo", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.2072883352 + "score":0.0354904515 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pa", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.2238489904 + "score":0.1880266806 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pa", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.2192906863 + "score":0.0178986288 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pa", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.330676254 + "score":0.0724032398 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pt", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pt", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.0506856305 + "score":0.0482308543 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pt", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.2502940387 + "score":0.2087387992 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pt", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0801303171 + "score":0.0574307954 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"pt", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1635269784 + "score":0.1098000711 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ru", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ru", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ru", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.050859162 + "score":0.0118398272 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ru", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.2002283949 + "score":0.1505828307 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ru", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0199756762 + "score":0.008719744 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ru", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.1055944134 + "score":0.056668863 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yo", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yo", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.001996605 + "score":0.0200324188 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.1999016804 + "score":0.1553578618 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0453589274 + "score":0.0095466427 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"sw", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.1405820992 + "score":0.0491504248 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"te", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"te", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"te", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1115247653 + "score":0.0513648793 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"te", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.1296059134 + "score":0.2577830867 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"te", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1480453761 + "score":0.119690435 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"te", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2690624141 + "score":0.1721639976 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ur", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"arc", "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.0114376329 - }, - { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.0233475148 + "score":1.0 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.0385720696 + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"classification", + "metric":"accuracy", + "score":0.9 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.1423687352 + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"vi", - "task":"classification", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", + "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"vi", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.0697220351 + "score":0.0821079546 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"vi", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.1874677848 + "score":0.3164863838 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"vi", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.135086862 + "score":0.1265931852 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"vi", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2446182636 + "score":0.1793067232 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zu", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.0190942897 + "score":0.0115347204 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.1108498114 + "score":0.1485833844 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0093856962 }, { - "model":"meta-llama\/llama-3.2-1b-instruct", - "bcp_47":"zh", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.0294450289 + "score":0.0608458885 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"microsoft\/phi-4-multimodal-instruct", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.2279903683 + "score":0.1520421573 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.4722573355 + "score":0.3707336059 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.1435174722 + "score":0.0325845731 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.3991235315 + "score":0.1866364833 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.1058038471 + "score":0.049235994 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.3048469769 + "score":0.1875704973 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0491812173 + "score":0.0203716729 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.2808449794 + "score":0.1964052359 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.1452228976 + "score":0.0457848104 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.3699202818 + "score":0.1635760551 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0630164833 + "score":0.0043114209 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.1755172285 + "score":0.0733956093 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.2933386948 + "score":0.0844832543 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.5447211689 + "score":0.2899357726 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.2166524228 + "score":0.0202296618 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.4496497227 + "score":0.1170466993 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2997360932 + "score":0.1314009634 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.552448295 + "score":0.3827163755 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.2837522278 + "score":0.0953897712 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5150814494 + "score":0.2894343613 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1559168311 + "score":0.0542604747 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4268663808 + "score":0.2839533373 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1391913129 + "score":0.0103488851 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3773300026 + "score":0.1022931459 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.1936957127 + "score":0.1005104859 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4274660929 + "score":0.3241333261 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2338775014 + "score":0.0397410561 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4546098648 + "score":0.1986373033 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.2240336457 + "score":0.0370753847 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.456891102 + "score":0.2466649661 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0249457171 + "score":0.0104943059 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.237299794 + "score":0.1703228075 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.3606359676 + "score":0.1181763987 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5477908661 + "score":0.3010924314 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.1847580734 + "score":0.0774366468 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.3911269476 + "score":0.232244564 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.1576463626 + "score":0.0700327695 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.3910005157 + "score":0.2793919522 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1690495289 + "score":0.04780178 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.3930640761 + "score":0.1986318307 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.1487255467 + "score":0.0761832692 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.4407404732 + "score":0.3293090829 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.2175383868 + "score":0.0940299872 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.4344749015 + "score":0.3022965125 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.3061950313 + "score":0.1421502617 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.5535821276 + "score":0.3249063292 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1811004213 + "score":0.044984749 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.3534531968 + "score":0.1531327249 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2646772038 + "score":0.1016298945 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5157826791 + "score":0.2980803254 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.331255344 + "score":0.0978160022 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5093924414 + "score":0.2190252958 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.3806413844 + "score":0.0784827192 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.5722776653 + "score":0.3301673127 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.2863154138 + "score":0.0942154389 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.5492472281 + "score":0.3318581823 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.181535472 + "score":0.0227564483 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.4498383877 + "score":0.1618637003 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0712120544 + "score":0.0041151275 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.3404533027 + "score":0.052321141 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.2928040954 + "score":0.1376994092 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.5416342014 + "score":0.3446006208 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.3623393932 + "score":0.1126650404 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5863625454 + "score":0.3592815418 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3129672706 + "score":0.1786073211 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5456543979 + "score":0.418923403 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4852409005 + "score":0.1529904036 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6908376394 + "score":0.4257110482 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2581336709 + "score":0.0881428767 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.4704308834 + "score":0.2731602409 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3342775397 + "score":0.1667346071 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5026657233 + "score":0.3197259125 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5510215557 + "score":0.3166864072 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6909834226 + "score":0.5483508218 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.6679215449 + "score":0.4404172544 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.823262947 + "score":0.7231001513 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3139331841 + "score":0.1298121807 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5454623234 + "score":0.3403579227 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3561290923 + "score":0.3743863952 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.599796306 + "score":0.5971283997 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"fa", + "task":"classification", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.3069040556 + "score":0.1016894588 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5327832177 + "score":0.3636401028 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.2185015953 + "score":0.1695199459 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.4264089038 + "score":0.3986853323 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3745780882 + "score":0.1164300835 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.5785175063 + "score":0.3215620941 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.2904415478 + "score":0.1452350029 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.5908280404 + "score":0.4128118494 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2963449909 + "score":0.1078563354 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5544997379 + "score":0.3207926618 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5091700689 + "score":0.3659011486 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.690320784 + "score":0.580998869 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0862885919 + "score":0.0208055886 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2372420697 + "score":0.1775856129 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0261732885 + "score":0.0303702553 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.168027641 + "score":0.1647346597 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.2 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.295613677 + "score":0.0897802232 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.5221513183 + "score":0.3334021167 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1697878702 + "score":0.0546926081 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4619303787 + "score":0.2631919591 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.1704488365 + "score":0.038636598 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.4403441536 + "score":0.1770095402 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.1466885285 + "score":0.0050909961 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.4179596519 + "score":0.1811657432 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.385731086 + "score":0.1271878224 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6034244629 + "score":0.3698932868 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.357812342 + "score":0.1324625901 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5894721809 + "score":0.3233634009 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.2543010782 + "score":0.0809118708 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.4974062413 + "score":0.2537010038 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1038893205 + "score":0.022242601 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.3250555425 + "score":0.2086622767 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.276319154 + "score":0.1216394809 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.5531277158 + "score":0.4062520998 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.3130631115 + "score":0.1488006127 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.576837791 + "score":0.3814897068 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3143630083 + "score":0.1489627056 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5621460006 + "score":0.4172638299 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3372718385 + "score":0.2008824981 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6543894215 + "score":0.5185852751 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.1641186812 + "score":0.0505138835 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.4109013799 + "score":0.1914888261 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.1375093856 + "score":0.0070198993 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.3810358014 + "score":0.1690394526 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.2236376263 + "score":0.0982399037 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.4348813399 + "score":0.2845447958 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.1135128656 + "score":0.0238061486 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.4345857133 + "score":0.2070418144 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.2934122255 + "score":0.1148524922 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5393871714 + "score":0.3722842281 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3100077394 + "score":0.250138544 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5714730187 + "score":0.5338430631 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.284322765 + "score":0.1169165949 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5304019177 + "score":0.3638899173 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2212745751 + "score":0.1330552123 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.355950114 + "score":0.2685952079 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.2779520489 + "score":0.0823011221 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5022141687 + "score":0.2825939861 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2476437073 + "score":0.0327082346 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.4805413308 + "score":0.2319054893 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kk", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"kk", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.1839138217 + "score":0.044667859 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.4860524069 + "score":0.2691000298 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.170404008 + "score":0.0732644907 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.4549282359 + "score":0.3296633392 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.2759135194 + "score":0.0353609299 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.5427685716 + "score":0.1909025949 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1237988917 + "score":0.0077167113 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3265823778 + "score":0.1386174808 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.2379701997 + "score":0.1165534681 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.4807978998 + "score":0.3877914341 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.2068140088 + "score":0.1033665849 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.4615143451 + "score":0.3638806009 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2508944927 + "score":0.1314926141 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4998320266 + "score":0.3540405018 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.210142355 + "score":0.1705869429 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.3127505848 + "score":0.2612780395 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0890010757 + "score":0.0158033007 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2623679578 + "score":0.1802186885 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0406768013 + "score":0.004737288 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.2265562343 + "score":0.1665989397 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.3934799806 + "score":0.0968803629 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.6000359011 + "score":0.3271235347 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2497480714 + "score":0.0742021289 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.5149579975 + "score":0.1811100359 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.2776870629 + "score":0.0830883828 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.5361437897 + "score":0.2590209016 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.101231398 + "score":0.0473984845 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.3867790942 + "score":0.2714023791 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.3 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.1710070826 + "score":0.0436065244 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.3961869922 + "score":0.1616079019 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0586978059 + "score":0.0321839146 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.3322172345 + "score":0.2872152251 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.2566290969 + "score":0.1039755938 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5196341734 + "score":0.3670583743 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.2701878605 + "score":0.0490353313 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4964908212 + "score":0.3016997477 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3224419544 + "score":0.1016737952 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5606155155 + "score":0.3529445259 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.1663847917 + "score":0.0980137705 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4383662593 + "score":0.3076980329 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.3103894957 + "score":0.2051934522 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5854645421 + "score":0.4174861616 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.3913336262 + "score":0.1030714956 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6467989318 + "score":0.3069420156 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.2392486974 + "score":0.1062506996 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.4920626101 + "score":0.3311804385 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.2130836675 + "score":0.0661499319 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.4622075168 + "score":0.2408635082 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.2893481535 + "score":0.1305087747 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.5215715176 + "score":0.336550146 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.1419402772 + "score":0.0829900967 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4014256358 + "score":0.3039678683 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.249611031 + "score":0.1166543201 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.4991029967 + "score":0.3031041679 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.2942599953 + "score":0.200304354 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.5860228525 + "score":0.4780661009 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0973366086 + "score":0.0265108253 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.3232614896 + "score":0.1166554461 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0552969578 + "score":0.0293332904 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.3314346183 + "score":0.1849240696 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0356994946 + "score":0.0177577979 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.2438060785 + "score":0.1411822431 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0183592041 + "score":0.0101970078 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.3076327609 + "score":0.1613941454 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.2403245803 + "score":0.0446786865 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.5023246313 + "score":0.1562050743 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1415388613 + "score":0.0022538162 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.4055113288 + "score":0.0960982382 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4519562833 + "score":0.3158857772 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6328740374 + "score":0.5109022919 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.3918546765 + "score":0.2903521386 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.551810658 + "score":0.4758823803 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pl", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"pl", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2474554181 + "score":0.1198559998 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.490135462 + "score":0.3492711529 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3387973296 + "score":0.2480316528 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5550868321 + "score":0.4488014348 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3065957195 + "score":0.1398701241 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5567333989 + "score":0.3034565852 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4450524918 + "score":0.3460979115 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6712742861 + "score":0.5835851988 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.2717880574 + "score":0.1130151873 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5367497902 + "score":0.3818050844 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.4990357373 + "score":0.1838222494 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6703309998 + "score":0.416344125 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2184907643 + "score":0.0947682488 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4824691404 + "score":0.3259777135 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3406091079 + "score":0.1796274314 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5689518318 + "score":0.4360781177 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.1780482269 + "score":0.0300958323 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.4251975218 + "score":0.2021438397 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.1238628432 + "score":0.0097834933 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.3651594596 + "score":0.1744531846 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.3 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.2363136631 + "score":0.0299661217 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.4724628618 + "score":0.204544657 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.1012580658 + "score":0.0009238366 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.3226539734 + "score":0.0843772457 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.1702954814 + "score":0.031724087 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.4314499751 + "score":0.16451202 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.1165218233 + "score":0.0046124791 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.3219658957 + "score":0.1085913002 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0415760658 + "score":0.017437495 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.2726763268 + "score":0.142035614 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.059393934 + "score":0.0146532378 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.3265574234 + "score":0.1923028552 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.0974935448 + "score":0.0516240546 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.337395124 + "score":0.153428686 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0786475166 + "score":0.0121002424 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.3616383914 + "score":0.1697462625 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2468393111 + "score":0.115711536 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.497224405 + "score":0.3597872407 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.4165200238 + "score":0.1694354423 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.60128551 + "score":0.4167060912 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.2331294709 + "score":0.0407632458 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4605502791 + "score":0.2238900502 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.1664653883 + "score":0.0714251247 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.4762879225 + "score":0.2764333203 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.3181506443 + "score":0.2208693059 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5441377883 + "score":0.4679683611 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.384958542 + "score":0.3234795754 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6364660715 + "score":0.5608576982 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.2373393477 + "score":0.0669163701 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5017230165 + "score":0.2784916366 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.2334583695 + "score":0.0633186191 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.5603415221 + "score":0.3074668268 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2147204762 + "score":0.0917433239 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.4649686586 + "score":0.3851148557 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.2451735521 + "score":0.1299606269 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5352886898 + "score":0.3428948363 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3767393472 + "score":0.1471870965 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5870286691 + "score":0.3392441061 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3415510311 + "score":0.2060450795 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5592933672 + "score":0.3717773766 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.1801710665 + "score":0.0594528699 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.4238537274 + "score":0.1988219607 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.1125461134 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.3456654305 + "score":0.1247215313 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2385233061 + "score":0.1079848157 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.4987667959 + "score":0.2801778291 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.3202315883 + "score":0.1131365873 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.4748886274 + "score":0.2713637811 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0653357736 + "score":0.0118794667 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.3152990905 + "score":0.1249552242 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0468367135 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.1314286197 + "score":0.0507921341 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2786347493 + "score":0.1097631082 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5143010521 + "score":0.3352988316 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3361615644 + "score":0.0795019275 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5732166456 + "score":0.2884455353 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2384384134 + "score":0.0837893895 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.4989290832 + "score":0.2809963487 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.4068700974 + "score":0.2215012201 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5784786574 + "score":0.4551853935 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0295327628 + "score":0.0345153294 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.168725075 + "score":0.1364890072 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.047075079 + "score":0.0012941396 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.2446929278 + "score":0.1092334478 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.252958993 + "score":0.0522713846 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5053979802 + "score":0.3192866676 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2339821201 + "score":0.0868686952 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.425306622 + "score":0.2859772299 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.236632122 + "score":0.0689558305 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.4793696196 + "score":0.22853185 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.265421135 + "score":0.0415402981 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.5116987882 + "score":0.2227329297 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2721498467 + "score":0.0649160569 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5200799335 + "score":0.2830042558 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3613736416 + "score":0.0350138164 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5866656133 + "score":0.2206802597 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0887261142 + "score":0.0588767323 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.2752257416 + "score":0.2123528181 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0469317169 + "score":0.0134096062 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.2008453897 + "score":0.1594472691 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.1844305556 + "score":0.0908263331 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4417239043 + "score":0.3394219762 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1248600823 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1650750126 + "score":0.0645497034 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.0773908628 + "score":0.0478741208 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.2972337309 + "score":0.1576256072 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0211167911 + "score":0.0235106256 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.2699477659 + "score":0.1852540612 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0578164805 + "score":0.0235724586 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.2633608218 + "score":0.1570768217 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0591720568 + "score":0.0152437624 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.1753356197 + "score":0.1214492647 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-nemo", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.2018552397 + "score":0.0740063452 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4546838419 + "score":0.2757375638 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1477972133 + "score":0.0872564614 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.238559837 + "score":0.1427963743 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2259180607 + "score":0.0992800287 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5258178103 + "score":0.324429867 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2474954475 + "score":0.09185491 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2841722148 + "score":0.1769207611 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.1511814979 + "score":0.0494841031 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.3667501588 + "score":0.1999996494 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0635700737 + "score":0.0196415161 }, { - "model":"meta-llama\/llama-3.3-70b-instruct", + "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.3690469819 + "score":0.1910730769 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-nemo", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.3304277157 + "score":0.2867784698 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.5443905094 + "score":0.5037863792 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.1851657228 + "score":0.2421610142 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.4035662808 + "score":0.4148106883 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.109249521 + "score":0.0787707917 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.3018012797 + "score":0.218746848 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0601412463 + "score":0.0187531501 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.3378209538 + "score":0.1112723085 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.3 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.2 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.278050095 + "score":0.0462314764 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.5030892659 + "score":0.1434072436 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.2056413876 + "score":0.0132821079 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.319475702 + "score":0.0456848091 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.3412628716 + "score":0.2054466179 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.6168944847 + "score":0.4635698598 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.2079777284 + "score":0.2513408047 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.4737326062 + "score":0.5078766295 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.3288891576 + "score":0.278999196 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.560493221 + "score":0.5072892325 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3370289388 + "score":0.335915232 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5616376735 + "score":0.5453940527 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1877842343 + "score":0.1240264763 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4430965745 + "score":0.3914345538 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1822974398 + "score":0.1767788852 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3877585115 + "score":0.3838449247 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.2820784803 + "score":0.2249154291 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.5005600008 + "score":0.4283700551 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.1762767818 + "score":0.2483121176 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.3980584269 + "score":0.4611825726 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.2678950965 + "score":0.1368466985 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.5139106802 + "score":0.3988973343 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.1062771627 + "score":0.0337104112 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.3373123707 + "score":0.2239889311 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.3378377362 + "score":0.2836423323 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5424277928 + "score":0.4827693819 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.2039436913 + "score":0.1511392088 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.413295677 + "score":0.3858872623 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.2078798411 + "score":0.1923090312 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.4443945632 + "score":0.3973361244 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1864199422 + "score":0.1398741741 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.4220840798 + "score":0.4023712427 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.1482839317 + "score":0.1262645615 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.4568926673 + "score":0.389349888 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.2815774482 + "score":0.164496362 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.4716025494 + "score":0.42344822 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.2239129937 + "score":0.2246336129 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.4981762083 + "score":0.4406538597 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1952639614 + "score":0.1192868334 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.40510597 + "score":0.3094007011 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.3130233588 + "score":0.2557484343 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5569163893 + "score":0.4816174974 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3322667951 + "score":0.336565743 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5140240989 + "score":0.500099888 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.3401088117 + "score":0.1502069597 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.5684561927 + "score":0.365503748 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.4507021781 + "score":0.1518861892 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.6542740054 + "score":0.3795025844 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.2563123252 + "score":0.0716255326 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.517826657 + "score":0.2354287318 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.2261228199 + "score":0.0016024995 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.4918700987 + "score":0.1289259809 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.3778406936 + "score":0.2895060168 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.6293186521 + "score":0.5088283599 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.4207498261 + "score":0.3502843148 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.6224036774 + "score":0.5851011111 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.386239845 + "score":0.33652498 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.6110300223 + "score":0.5394051209 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4508475568 + "score":0.4124704223 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6653203029 + "score":0.6500309258 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.3208174129 + "score":0.2440848305 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.5441131834 + "score":0.4482066389 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3167289307 + "score":0.3683187834 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5086510118 + "score":0.5300499022 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"arc", "metric":"accuracy", - "score":0.3 + "score":1.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5649266234 + "score":0.4669071745 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.7240609445 + "score":0.6351301458 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.6160790992 + "score":0.5813419207 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8133281991 + "score":0.8065247071 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3152773331 + "score":0.293714449 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5548531112 + "score":0.4892518335 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3335962816 + "score":0.3400529578 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.5859881472 + "score":0.6054518089 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.2974755741 + "score":0.2392739698 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.548510072 + "score":0.4813714407 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.198877694 + "score":0.2321648572 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.4016676481 + "score":0.4166342577 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3394346908 + "score":0.2811212879 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.5772205685 + "score":0.4244631944 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.2961737536 + "score":0.2282664087 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.5732830973 + "score":0.5153269959 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3174175523 + "score":0.2548863763 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5682518332 + "score":0.5181895957 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5173973527 + "score":0.4309072933 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.7056428374 + "score":0.6122951839 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0282301718 + "score":0.0365190298 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2509358266 + "score":0.1781763265 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0302470726 + "score":0.0417146562 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.2143808411 + "score":0.0914072868 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.336179684 + "score":0.266641943 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.5448761462 + "score":0.4689143537 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1255076156 + "score":0.1714078748 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4301109075 + "score":0.45528413 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.3 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.2176466652 + "score":0.0639113657 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.4472887488 + "score":0.1707631202 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.2048605344 + "score":0.0551968249 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.4547067722 + "score":0.165412979 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3796955055 + "score":0.3708164771 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6056742688 + "score":0.5780452995 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.382370623 + "score":0.3889665973 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5922578575 + "score":0.5940361548 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.2583771315 + "score":0.261843766 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.4968818998 + "score":0.4806474097 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.116404849 + "score":0.0995040783 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.355624191 + "score":0.3935925698 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2893259192 + "score":0.218205371 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.5696888872 + "score":0.4561374245 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.3664678104 + "score":0.2523642916 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.6152145331 + "score":0.5178642158 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2523550022 + "score":0.2714445111 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5458277736 + "score":0.5033343062 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3774331947 + "score":0.3367134056 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6486159416 + "score":0.6204206544 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.1 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.2004372781 + "score":0.0577998278 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.4254008414 + "score":0.184470268 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.2594342647 + "score":0.0317976664 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.4939201844 + "score":0.1311048104 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.2 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.1883645682 + "score":0.097793149 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.4379611856 + "score":0.2519860373 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.166061461 + "score":0.0683877466 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.4725232576 + "score":0.255829494 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.2923994901 + "score":0.26428669 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5447705341 + "score":0.4902151754 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.4037821428 + "score":0.3239977856 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.6296260979 + "score":0.5865126635 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.3130983776 + "score":0.2208745982 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5536124921 + "score":0.4944838309 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.3266051607 + "score":0.1755723698 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4444671407 + "score":0.3733502483 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3146151088 + "score":0.1125731148 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5434123174 + "score":0.2778916971 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.3068652176 + "score":0.0861201622 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5696595268 + "score":0.3310005151 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.2107380254 + "score":0.1995238484 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.4991705013 + "score":0.4335224538 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.331371608 + "score":0.172304501 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.5825812793 + "score":0.4246105774 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.1 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.3610313078 + "score":0.0572277693 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.605032383 + "score":0.2158207267 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1827077293 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.4206815495 + "score":0.0812368695 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.273765965 + "score":0.2415284955 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.5541955864 + "score":0.4761318508 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.2689874625 + "score":0.2506029382 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.5150630417 + "score":0.4820610024 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2599758467 + "score":0.2390442925 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4915631618 + "score":0.4660486517 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.1778560135 + "score":0.1889249825 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.2969773205 + "score":0.314876296 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0622795727 + "score":0.0673075407 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2675764955 + "score":0.2078406147 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0381251381 + "score":0.0194519734 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.2812186233 + "score":0.1011298866 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.2963258613 + "score":0.2922121087 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.5478698134 + "score":0.5037938788 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2490501232 + "score":0.2113094586 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.470734921 + "score":0.4865126178 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.2517024761 + "score":0.2362297066 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.5071223357 + "score":0.4746614882 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.1690507631 + "score":0.108285746 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4610312304 + "score":0.3934483867 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.245769233 + "score":0.0484302224 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.4441647844 + "score":0.2222317379 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.1869691691 + "score":0.0220530515 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.4993277276 + "score":0.2506994166 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.3697837131 + "score":0.2936668736 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.598263628 + "score":0.4940538554 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.2866761532 + "score":0.1686173343 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.5500074549 + "score":0.4033123912 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3125095049 + "score":0.3097940645 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5535296132 + "score":0.5304242832 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2275791183 + "score":0.2168719994 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4569915545 + "score":0.4555868419 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.345465339 + "score":0.3030951939 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5897544047 + "score":0.5195567075 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.4313143535 + "score":0.3655435175 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.672362003 + "score":0.6292737269 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.3075903861 + "score":0.20731642 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.578349632 + "score":0.414222781 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.1897910105 + "score":0.0884087592 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.4717519215 + "score":0.2678188556 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.3557303786 + "score":0.2746115511 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.5879008408 + "score":0.5022730585 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.1851047496 + "score":0.1969099003 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4783025401 + "score":0.4481037581 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2465270522 + "score":0.23536401 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.5037852263 + "score":0.4943928771 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.3149743955 + "score":0.3235114454 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.5635296931 + "score":0.5875209718 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.1800815958 + "score":0.0495608632 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.4077560746 + "score":0.2045968087 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0667158921 + "score":0.0294620037 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.3678415876 + "score":0.1680044731 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0536067183 + "score":0.0200154664 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.2934406231 + "score":0.1606834413 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0212136796 + "score":0.0044245595 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.3367052821 + "score":0.1054952984 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.3 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.2884546263 + "score":0.1001204869 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.5474826718 + "score":0.2836152046 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.2053276951 + "score":0.0445164582 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.4215800492 + "score":0.2337334441 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4246522462 + "score":0.4077844252 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6364852561 + "score":0.6076754833 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.439726575 + "score":0.4459003493 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5806732389 + "score":0.5840266721 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2708043929 + "score":0.2148436144 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5320865131 + "score":0.4670207413 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.2827359953 + "score":0.2898684366 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5407300006 + "score":0.525989117 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3451568022 + "score":0.264907032 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5845365036 + "score":0.4939362461 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4582156105 + "score":0.4563355662 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6491023878 + "score":0.6843169799 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.1 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.3130092522 + "score":0.258396409 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5728467895 + "score":0.4912290692 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.503492173 + "score":0.4713411152 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6731092747 + "score":0.6517904546 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2567420946 + "score":0.1955652432 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5061211552 + "score":0.4654058492 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.4025875747 + "score":0.2591393679 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.6083538055 + "score":0.4936043335 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.1596218382 + "score":0.0562734776 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.3642420922 + "score":0.212197658 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.1505395069 + "score":0.0314083234 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.4131000516 + "score":0.1492631083 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sd", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sd", - "task":"mgsm", + "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.3060556532 + "score":0.0890242869 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.5236224481 + "score":0.2507686532 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.2303677631 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.4550652237 + "score":0.0807676975 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.2505378464 + "score":0.0084519738 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.4777570712 + "score":0.151041875 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.238193304 + "score":0.0056901248 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.4255249112 + "score":0.100130068 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.1176743345 + "score":0.0267041676 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.3404474685 + "score":0.1555601794 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.1289978109 + "score":0.018455165 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.426186094 + "score":0.1280213362 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.2 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.2374825749 + "score":0.0558797598 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.4257984603 + "score":0.1672295272 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.1278040594 + "score":0.0266323434 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.4360467159 + "score":0.1344659816 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2878836927 + "score":0.2165549669 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.5437304451 + "score":0.4902121608 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.4038152012 + "score":0.2493024035 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5978824564 + "score":0.4391116426 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.2373274152 + "score":0.1532491466 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4608187705 + "score":0.3330342559 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.1899355362 + "score":0.0762662838 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.4693159323 + "score":0.2740420072 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.298223826 + "score":0.2875970952 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5684457257 + "score":0.4962250868 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.3825937295 + "score":0.382073635 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6393626909 + "score":0.6293993104 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.3425500041 + "score":0.0845702794 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.5680420054 + "score":0.2799479817 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.2980763573 + "score":0.1327606257 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.5924685945 + "score":0.4346855791 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2473591284 + "score":0.2777158956 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.5382867852 + "score":0.4958635491 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.3068229029 + "score":0.2970897235 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5637367471 + "score":0.5394670378 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3462186566 + "score":0.3805998732 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.6055817314 + "score":0.5566308844 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3822149946 + "score":0.3811390337 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5783618359 + "score":0.5895281984 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.2141866571 + "score":0.0685553777 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.4746491206 + "score":0.2845942287 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.2383903304 + "score":0.0371906835 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.4750617701 + "score":0.1797173863 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"th", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"th", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2652851581 + "score":0.2079294904 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.5278626321 + "score":0.4320631023 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.3718263092 + "score":0.2456928253 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.5255136074 + "score":0.3989628007 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.1999962108 + "score":0.0154825384 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.4183108341 + "score":0.1339811483 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.1133461632 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.2113810541 + "score":0.0278445131 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2978237586 + "score":0.2699521486 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5152360665 + "score":0.478882362 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3447394658 + "score":0.2654978305 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5850690403 + "score":0.5487755246 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2589937034 + "score":0.2622533206 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.5348601679 + "score":0.4831695415 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.3544986277 + "score":0.3002613398 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5913345073 + "score":0.4834403722 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0650028377 + "score":0.0279025481 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.1912574022 + "score":0.0843438607 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0525305732 + "score":0.0224775292 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.2699878572 + "score":0.1057554869 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.253549974 + "score":0.2469989894 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5235064606 + "score":0.4943114536 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2484556869 + "score":0.2844546137 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.429442787 + "score":0.477210689 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.2701010494 + "score":0.1727284585 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.5241051692 + "score":0.4250344787 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.3181656056 + "score":0.1817744295 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.5907906511 + "score":0.4656549066 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2670924013 + "score":0.2373174322 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5198891912 + "score":0.4521152897 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3597766713 + "score":0.2962197342 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6081806669 + "score":0.544285644 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0741464388 + "score":0.0678548322 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.2660826012 + "score":0.1995976377 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0695734356 + "score":0.0410008999 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.2997632689 + "score":0.1410775666 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.2653942694 + "score":0.1807443545 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4893923691 + "score":0.4282740606 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1157826458 + "score":0.1030562145 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1765005496 + "score":0.1694260317 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.1 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.1376742076 + "score":0.0705660888 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.3734228567 + "score":0.1999709116 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0864151864 + "score":0.005606616 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.4104899998 + "score":0.0791302868 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.1 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0937886749 + "score":0.0146140319 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.3196766983 + "score":0.1546761245 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.1093519063 + "score":0.0066574272 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.3224056963 + "score":0.0720455741 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-saba", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.2204568545 + "score":0.1686505919 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4894165826 + "score":0.4407452421 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1569463992 + "score":0.1561257665 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.232785021 + "score":0.2413348415 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"arc", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2703289724 + "score":0.2391219094 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5473102513 + "score":0.4993680631 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.317769874 + "score":0.2546682455 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3676906362 + "score":0.3131256963 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.1 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.2474518428 + "score":0.0409238482 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.5019415605 + "score":0.1759269251 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.19092807 + "score":0.040396663 }, { - "model":"meta-llama\/llama-4-maverick", + "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.4983188666 + "score":0.1387970813 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-saba", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.1327783313 + "score":0.2175149129 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.3636961218 + "score":0.4527759686 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.1557273583 + "score":0.1124798847 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.3515058711 + "score":0.2758121544 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.0375398146 + "score":0.0499477269 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.1986406573 + "score":0.161467557 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0015655622 + "score":0.0061206295 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.0623964125 + "score":0.1175310591 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.4 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0476085337 + "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.205897506 + "score":0.0682401612 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0110136998 + "score":0.0028802187 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.0688367427 + "score":0.0506386945 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.0927070911 + "score":0.1505032551 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.3300356171 + "score":0.4086092545 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.1909661669 + "score":0.1882561377 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.4391780261 + "score":0.3814760125 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.1548779531 + "score":0.1878091774 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.3887963415 + "score":0.4280751788 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.268706305 + "score":0.2100749947 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.4959259833 + "score":0.3864616183 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.0374286633 + "score":0.093266394 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.276784029 + "score":0.311215609 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1363017113 + "score":0.1039989943 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3536429421 + "score":0.2907556954 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.106947781 + "score":0.1020177653 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.3169488071 + "score":0.3338664094 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.1479958867 + "score":0.1064323135 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.3761797641 + "score":0.3080022567 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.0312538317 + "score":0.0979045908 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.2922637643 + "score":0.3174950846 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0234894436 + "score":0.0117862293 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.2225963414 + "score":0.1847940791 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.2171579973 + "score":0.2573591397 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.456713607 + "score":0.4309874046 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.1681044686 + "score":0.0851522303 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.3440820027 + "score":0.2284178182 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.0298418752 + "score":0.0818689903 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.2580610439 + "score":0.2636451344 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.041095966 + "score":0.0746797577 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.2930729253 + "score":0.3147526037 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.0556571943 + "score":0.0989261118 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.3548784075 + "score":0.3536619814 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.066482798 + "score":0.1746758677 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.3295363828 + "score":0.3604842775 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.1569501012 + "score":0.1908291186 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.4039420627 + "score":0.4048132215 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1151073387 + "score":0.0320570973 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.3126448605 + "score":0.1921106676 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.095240952 + "score":0.1554049163 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.3840548344 + "score":0.3557989532 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.1876367188 + "score":0.2229681692 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.4310988737 + "score":0.3722031872 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.1916153649 + "score":0.2130798288 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.4266920518 + "score":0.3738480621 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.0532026402 + "score":0.1589337472 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.2442653709 + "score":0.4334432444 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.0233838479 + "score":0.037561576 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.2071232952 + "score":0.1318206471 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0030055044 + "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.1680771697 + "score":0.1217595648 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.1169562212 + "score":0.210101514 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.4547809891 + "score":0.4301134482 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.2167266047 + "score":0.362165784 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.4629060689 + "score":0.5625700474 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.2546126219 + "score":0.2836619572 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.4840060449 + "score":0.5189927538 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.3171866034 + "score":0.3481484827 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.5752285995 + "score":0.5703115876 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.1236158233 + "score":0.2310257801 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.3922493462 + "score":0.4431650209 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.1783375751 + "score":0.2968021074 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.4003787241 + "score":0.4607855577 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.3642023499 + "score":0.4358717425 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.5697992815 + "score":0.6055153523 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.4959810553 + "score":0.6254747881 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7232313255 + "score":0.7898596498 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.1732534835 + "score":0.2211802733 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.4434970776 + "score":0.4673850088 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3605235101 + "score":0.3141247128 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.5849733787 + "score":0.5664089061 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.0868573088 + "score":0.1378005544 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.3068232268 + "score":0.3634410941 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.0883262705 + "score":0.1662541754 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.3294670602 + "score":0.3656154799 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.1921595243 + "score":0.2598161419 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.4381909531 + "score":0.4673557809 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.2102834142 + "score":0.2352273865 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.503934087 + "score":0.5423645035 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.1763652726 + "score":0.2637885864 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4428784232 + "score":0.50661739 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.3772793055 + "score":0.3898419239 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.5820724576 + "score":0.5983170279 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0067893116 + "score":0.030172991 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2007893146 + "score":0.1791853335 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0132875082 + "score":0.0012644122 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1300679396 + "score":0.0541809315 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.1581139234 + "score":0.1879687767 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.3992847318 + "score":0.3575768224 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1006122628 + "score":0.1193068232 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.3529986856 + "score":0.3258076554 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.0310807341 + "score":0.0424529379 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.2470826922 + "score":0.205545596 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.0634023566 + "score":0.0315047557 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.2955476351 + "score":0.1862281652 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.2788689746 + "score":0.2931006661 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5417455941 + "score":0.5117710763 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.2369610218 + "score":0.3454885653 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.3765795877 + "score":0.5632832845 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.1094117889 + "score":0.1423442665 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.3715115564 + "score":0.3938289086 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.0627224628 + "score":0.0479071398 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.3120135336 + "score":0.2390532358 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.1148528139 + "score":0.1466679693 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.3969632133 + "score":0.4020226017 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.2004789157 + "score":0.2296543368 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.4541545495 + "score":0.4623290904 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.0855471394 + "score":0.1846129963 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.3720740561 + "score":0.4218789485 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.284365864 + "score":0.2363785743 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.589202199 + "score":0.5715726858 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0344910359 + "score":0.042005649 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.2176253825 + "score":0.1704312564 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.0020930717 + "score":0.0060037968 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.0498013123 + "score":0.1052387436 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.0788515324 + "score":0.1206947602 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.3023201397 + "score":0.2893596175 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.0177354807 + "score":0.0376635554 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.2429763441 + "score":0.2154020665 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.1849581121 + "score":0.203133363 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.4562979327 + "score":0.4421827582 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.2591551222 + "score":0.2636104621 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5311194854 + "score":0.5093890816 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1462066826 + "score":0.1660483895 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4299215293 + "score":0.4317128373 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2371087689 + "score":0.2448276505 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4064089202 + "score":0.4102738917 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.1102793601 + "score":0.1159703103 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.3718502317 + "score":0.3116167676 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.0999035402 + "score":0.0803083214 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.3664761129 + "score":0.3001867634 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.0537848954 + "score":0.1346426707 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.3139411656 + "score":0.3961827686 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.0666822222 + "score":0.138703676 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.3622322436 + "score":0.4106570721 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.0753739979 + "score":0.0501718274 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.3445771251 + "score":0.2070022512 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0014765966 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.1701733674 + "score":0.0534187009 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.104202131 + "score":0.1342281856 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.3590734072 + "score":0.3526105747 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.0705324379 + "score":0.146553268 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.3025321109 + "score":0.3291051456 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.1107444823 + "score":0.1737342381 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.3916459404 + "score":0.4232883693 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.1649626358 + "score":0.2383832092 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.2656552119 + "score":0.33001113 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0539171508 + "score":0.0707102369 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2436825008 + "score":0.2324042355 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.1581276083 + "score":0.1164466909 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.1658903033 + "score":0.2572289084 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.4217933103 + "score":0.5125685183 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.0803859812 + "score":0.1223672825 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.3220461814 + "score":0.3023512099 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mai", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.1018185799 + "score":0.2098940087 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.3881585962 + "score":0.443017936 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.0840554004 + "score":0.0762167285 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.3260852936 + "score":0.2552721118 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.036255172 + "score":0.0777950532 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.2361171448 + "score":0.2573049595 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0369324798 + "score":0.0666322315 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.3426601677 + "score":0.3626993592 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.1675392326 + "score":0.2059798463 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.4161590898 + "score":0.4231363675 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.101823454 + "score":0.2059172406 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.3124446375 + "score":0.4148312305 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.1012625471 + "score":0.2216993022 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.3580813711 + "score":0.4221894818 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.0891010327 + "score":0.1386303624 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.3158325956 + "score":0.3369477219 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.1211248924 + "score":0.2392636803 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.389246098 + "score":0.4545241599 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.2367123999 + "score":0.3632780792 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.5273473365 + "score":0.6254141203 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.1262296798 + "score":0.0217828279 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.3480250641 + "score":0.144054989 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.104091386 + "score":0.0351771663 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.3135377948 + "score":0.2287244941 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.1488007297 + "score":0.1464596557 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.4132412315 + "score":0.3564056311 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.0478735067 + "score":0.1517063855 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.3069618299 + "score":0.4352500122 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.1523025562 + "score":0.2230448991 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.4096891017 + "score":0.4584787016 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.2469695748 + "score":0.2531295878 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.524876157 + "score":0.5302964071 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0369494139 + "score":0.0696320569 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.2350129808 + "score":0.2331307278 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0015518794 + "score":0.0252073886 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.1140211549 + "score":0.1703033014 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0103766134 + "score":0.0276939955 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.1821363344 + "score":0.1837469296 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0003856632 + "score":0.0160221908 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.0985339751 + "score":0.2144565152 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.1279029727 + "score":0.083347512 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.3504496172 + "score":0.2190646209 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1158076498 + "score":0.0679191643 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.3583374616 + "score":0.2873941526 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3550414512 + "score":0.3647734864 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.5626107823 + "score":0.5784168493 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.2784963846 + "score":0.3897396366 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.4121299981 + "score":0.5030239884 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.1049411882 + "score":0.1850936564 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.371724232 + "score":0.3999928464 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.2126550777 + "score":0.2726874239 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.4754992095 + "score":0.4948927457 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.1813353123 + "score":0.2070601418 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.4632560004 + "score":0.4601106145 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.3450201321 + "score":0.412349088 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.5827805827 + "score":0.6102742767 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.1323104842 + "score":0.2142629544 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.3747307468 + "score":0.4499459763 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.2733723845 + "score":0.3895465667 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.5057937589 + "score":0.5708848992 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.164734586 + "score":0.1710045162 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4400610126 + "score":0.433047449 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.2884407046 + "score":0.2961144006 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5338739518 + "score":0.5257430939 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.0202859007 + "score":0.057729338 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.2084128437 + "score":0.2177957601 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.0129709626 + "score":0.0230572611 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.1407028363 + "score":0.16449999 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.0476641683 + "score":0.0484864486 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.1691869095 + "score":0.1952223401 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0111247819 + "score":0.0005975301 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.151377306 + "score":0.1227717162 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.0307653909 + "score":0.0165865489 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.2382457281 + "score":0.1532087128 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.0240096696 + "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.154444722 + "score":0.1038227782 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0116292791 + "score":0.0457227327 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.1897831748 + "score":0.2051868353 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.000876482 + "score":0.0141586748 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.066397943 + "score":0.1192333436 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.0221364496 + "score":0.0421597981 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.2036637198 + "score":0.1665679168 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0300378344 + "score":0.0011074127 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.146034089 + "score":0.1483863351 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.1551096033 + "score":0.1814266299 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.4297549368 + "score":0.4404144211 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.200397515 + "score":0.2821064012 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.4351193348 + "score":0.492442613 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.0631852964 + "score":0.1500059372 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.3127999721 + "score":0.385325025 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.0328870671 + "score":0.0190556256 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.285042966 + "score":0.201727165 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.1749499193 + "score":0.2063172621 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.4691275614 + "score":0.4245629061 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.2454574882 + "score":0.2871245672 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.5348019826 + "score":0.5575367366 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.1325294802 + "score":0.1106429776 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.4051925402 + "score":0.3246322884 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.1631216823 + "score":0.1586725311 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.4696161488 + "score":0.4486820539 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.1338729952 + "score":0.0861902503 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.3640492116 + "score":0.2737902674 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.1115670494 + "score":0.1910130331 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.4303510763 + "score":0.4159297845 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.2374723306 + "score":0.2631133201 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.4675485501 + "score":0.504043761 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.2528444882 + "score":0.2496114121 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.4794045124 + "score":0.4707696336 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.00699528 + "score":0.0528559098 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.2382738034 + "score":0.2067746551 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.0018437478 + "score":0.0275980154 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.2225002567 + "score":0.1880963665 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.1140490825 + "score":0.1533969949 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.3668015685 + "score":0.3718867563 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.1317498141 + "score":0.2858310833 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.2770372268 + "score":0.4323129392 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0342955291 + "score":0.0536249593 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.2066101372 + "score":0.190207113 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0101983319 + "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.0539238863 + "score":0.0308454815 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"tr", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.1209396556 + "score":0.1776801562 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.3409074931 + "score":0.4036600408 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.270580586 + "score":0.2335702423 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.537606561 + "score":0.4992388897 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.1285741979 + "score":0.1788374332 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.4057727321 + "score":0.4671147568 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.1865914948 + "score":0.2348003993 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.4565231191 + "score":0.4786253942 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0177160721 + "score":0.0150184554 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.1342994379 + "score":0.1309010161 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0010895392 + "score":0.0010839978 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.0283042279 + "score":0.0719045729 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.1155003818 + "score":0.1854021136 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.3250077925 + "score":0.4258067424 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.1195513435 + "score":0.1388133394 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.3158904676 + "score":0.3685419874 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.0676473408 + "score":0.1877241389 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.2672641675 + "score":0.4060591516 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.0166808106 + "score":0.1165785177 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.2361978954 + "score":0.4074870036 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.1452425625 + "score":0.2204611632 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.3941195385 + "score":0.4366931331 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.2680553268 + "score":0.2957932526 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5055559664 + "score":0.5127606293 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0218129891 + "score":0.0530966299 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.2316696377 + "score":0.183305815 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0005135911 + "score":0.0017326575 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.0644762753 + "score":0.1225688999 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.0721904827 + "score":0.1059358827 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.3322122834 + "score":0.3291955196 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0963078281 + "score":0.0116334446 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1506583582 + "score":0.0643182856 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.0658670408 + "score":0.0665909516 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.2264957148 + "score":0.2275785677 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0240888197 + "score":0.0230978994 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.1589846026 + "score":0.1160311087 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0361610953 + "score":0.0299390587 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.2209617429 + "score":0.1474455997 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0152526027 + "score":0.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.1066841292 + "score":0.0919549448 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1360263411 + "score":0.1503741808 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4135302369 + "score":0.3985964495 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1427052583 + "score":0.1755859315 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2190638456 + "score":0.2407951689 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1516445239 + "score":0.1386896901 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4517979691 + "score":0.413832278 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2058198052 + "score":0.2503925306 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2799124898 + "score":0.3045084897 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0158837296 + "task":"mmlu", + "metric":"accuracy", + "score":0.3 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"translation_from", - "metric":"chrf", - "score":0.2111229219 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"translation_to", "metric":"bleu", - "score":0.0440027048 + "score":0.0365820579 }, { - "model":"microsoft\/phi-4", + "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.1856579938 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.1381244544 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.3106084366 + "score":0.1964572986 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.1319133586 + "score":0.0044447951 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.2261398681 + "score":0.1846817289 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"classification", + "model":"mistralai\/mistral-small-3.1-24b-instruct", + "bcp_47":"zu", + "task":"truthfulqa", "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 + "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.0101349522 + "score":0.0371320408 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.0727946226 + "score":0.1862601893 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0087636854 + "score":0.0232552001 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.0326918009 + "score":0.1816122083 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0143438883 + "score":0.0762582721 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.0651139855 + "score":0.2256183152 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0155010137 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.0145699741 + "score":0.111632655 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.1077126314 + "score":0.1105492032 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.3303312588 + "score":0.3824462343 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.0568249639 + "score":0.2049615052 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.1985159581 + "score":0.4519234477 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.1428907436 + "score":0.1985490849 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.3107041775 + "score":0.4116485218 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.1849770017 + "score":0.3336371818 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.2932088535 + "score":0.5253002356 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.0246413933 + "score":0.0933745535 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.2220905764 + "score":0.3852051191 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.0462912201 + "score":0.112918589 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.2082310898 + "score":0.3403937393 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.021812522 + "score":0.1051779987 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.1700035697 + "score":0.3391065166 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.0687018163 + "score":0.1966274075 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.213092048 + "score":0.3960585372 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"as", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.0516867052 + "score":0.0575768902 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.2165108464 + "score":0.3040332139 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0097728449 + "score":0.0290897017 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.1483692036 + "score":0.2669483396 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.08262787 + "score":0.1716262856 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.281005553 + "score":0.4261716241 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.0481979333 + "score":0.1464539147 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.2232523474 + "score":0.3455385109 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.0805820584 + "score":0.0835422268 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.2555758551 + "score":0.3248882933 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.0296286693 + "score":0.0978478358 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.1319853113 + "score":0.3301671275 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"mgsm", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"bho", + "task":"arc", "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.0173366455 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.220616462 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.0065148659 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.1526491803 + "score":0.7 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.0459721625 + "score":0.1701727662 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.2229551601 + "score":0.4313256486 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.0233985631 + "score":0.122889461 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.1665184954 + "score":0.289913907 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"classification", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"bn", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.0355167863 + "score":0.1281858401 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.2600874171 + "score":0.3858938936 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.0323184525 + "score":0.125191978 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.1970289791 + "score":0.3696701209 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"classification", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ceb", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.0892751266 + "score":0.2520982183 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.2388608153 + "score":0.4444681724 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.0416926889 + "score":0.3102736093 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.1552666429 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.5 + "score":0.5214898195 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"mgsm", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"cs", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.020551822 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.1292684598 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.0744822177 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.0527597248 + "score":0.2303251904 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.3026154166 + "score":0.450613459 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.0777044688 + "score":0.2426993481 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.3081482084 + "score":0.4712451818 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.1237340737 + "score":0.2626874911 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.3601104142 + "score":0.4988486171 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.1371705946 + "score":0.3809175562 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.4120757797 + "score":0.6066039572 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.0837672025 + "score":0.1697631286 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.2648038016 + "score":0.3535802564 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.0885028071 + "score":0.2098247736 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.2007646735 + "score":0.417434594 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.3548422361 + "score":0.399751444 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.460765953 + "score":0.5723628973 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.4690424472 + "score":0.464784706 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.6788013861 + "score":0.6741611276 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.1169662945 + "score":0.2233867986 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.3242693179 + "score":0.4625939523 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.2556403143 + "score":0.361048469 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.4583071754 + "score":0.5667561181 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.0271486292 + "score":0.1859289486 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.2182731449 + "score":0.4402215767 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.0526574176 + "score":0.0783879247 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.2424108963 + "score":0.2777670309 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.0762125847 + "score":0.2111316415 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.2228549327 + "score":0.4343589207 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.0269063649 + "score":0.1904775276 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.2401408344 + "score":0.4788196159 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.0839707225 + "score":0.2014604354 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.3074010094 + "score":0.4639286173 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.2478840637 + "score":0.3927528149 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.4205657928 + "score":0.5874921326 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0290727628 + "score":0.0768136914 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.1841843114 + "score":0.1865808917 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0523495621 + "score":0.0163860397 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1231670583 + "score":0.1761153537 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.0005257422 + "score":0.1323783916 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.0721265952 + "score":0.3850340086 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.000262224 + "score":0.1060744828 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.0118348356 + "score":0.3580675535 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.0318398305 + "score":0.0737222138 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.1834830244 + "score":0.2500218213 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.0320718253 + "score":0.0721259007 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.1032515167 + "score":0.2931833463 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.1989310744 + "score":0.3206299694 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.408792844 + "score":0.5380529839 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.2107266229 + "score":0.292022826 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.3722535388 + "score":0.5099805952 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.0223204074 + "score":0.1310248624 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.1959765545 + "score":0.4057643378 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.022115131 + "score":0.0541491102 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.1882969266 + "score":0.263637102 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"classification", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"hu", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.1149995432 + "score":0.2005919962 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.3052316233 + "score":0.4635609134 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.1017437337 + "score":0.2668215975 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.2651695911 + "score":0.4987236442 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.1251179936 + "score":0.1674187488 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.3078536626 + "score":0.4422221563 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.1049757961 + "score":0.2922430013 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.338086632 + "score":0.5510140576 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0185191424 + "score":0.0555362323 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.1790132896 + "score":0.236381065 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.0151653031 + "score":0.0170051195 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.0537338226 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.7 + "score":0.1450260585 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"mgsm", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"it", + "task":"arc", "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.0341024751 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.2126115238 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0221151729 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.1431429685 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.1041933329 + "score":0.1928019801 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.3064701129 + "score":0.4582860792 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.1299185029 + "score":0.2733663358 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.3570513672 + "score":0.5231933614 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1170990874 + "score":0.1843295265 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.3281623219 + "score":0.4175841484 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.1166577127 + "score":0.247062292 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.2303280443 + "score":0.3738750801 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"jv", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.0192945074 + "score":0.1248080013 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.2015068169 + "score":0.3323730185 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.045857499 + "score":0.1222608237 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.1778848232 + "score":0.3755543507 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"classification", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"kk", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.0278653757 + "score":0.0542399326 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.2309769046 + "score":0.2976203376 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.0264488684 + "score":0.0603448772 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.1618433519 + "score":0.3011538751 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.0187368299 + "score":0.0423245128 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.210610547 + "score":0.2646605638 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.0070803381 + "score":0.0030665166 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.0602951272 + "score":0.1617644115 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.0343738545 + "score":0.142542051 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.1971697601 + "score":0.3941172286 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.012430185 + "score":0.095018815 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.0969965616 + "score":0.3690882139 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.1011791445 + "score":0.1298139392 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.2665626277 + "score":0.3909547555 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.0674482283 + "score":0.2582727386 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.1439352867 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0333812973 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2140071833 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.013803565 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.0856760144 + "score":0.3442557032 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.0857349903 + "score":0.2128159963 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.2682295704 + "score":0.4639121691 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.0363984536 + "score":0.1877293722 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.2133514375 + "score":0.4156064229 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.0831966089 + "score":0.1522348659 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.3252283455 + "score":0.4142435328 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.0100264548 + "score":0.0938946347 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.2049643183 + "score":0.3116778843 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.0402775114 + "score":0.0549301185 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.2132968488 + "score":0.2672873596 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0178624704 + "score":0.0463756582 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.1584836987 + "score":0.3097498513 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.044306682 + "score":0.1141453782 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.2110608123 + "score":0.3477667157 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.0101250707 + "score":0.1057291821 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.1446641679 + "score":0.3103268517 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.0250471784 + "score":0.1480972279 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.2097577846 + "score":0.3846594696 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.034382114 + "score":0.1588893829 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.2211758055 + "score":0.3218843951 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.120023798 + "score":0.217708728 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.3039131897 + "score":0.449213988 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.1137229069 + "score":0.3422949582 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.3446031673 + "score":0.5811761531 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.1101780964 + "score":0.1184833265 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.2424045636 + "score":0.2686318029 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.0971253665 + "score":0.1103891214 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.1223804901 + "score":0.2937298939 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.0522706053 + "score":0.192171828 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.2509451803 + "score":0.4057435234 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.0404811569 + "score":0.1172467131 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.2098515398 + "score":0.375586286 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.1127735687 + "score":0.1863008756 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.3096427976 + "score":0.4262401563 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.1171995651 + "score":0.2219365699 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.39693057 + "score":0.4997896782 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0249203424 + "score":0.0832085938 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.183758763 + "score":0.2560508851 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0148302605 + "score":0.0245166671 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.1329930306 + "score":0.1971989167 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0171568718 + "score":0.030893556 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.1561109456 + "score":0.1822055745 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0097264241 + "score":0.0023241318 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.0452833915 + "score":0.1765743592 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.0554840251 + "score":0.0743696949 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.1538079363 + "score":0.3048835131 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.0033288372 + "score":0.0867109239 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.036508675 + "score":0.3141207717 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.7 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"pa", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3055395757 + "score":0.2585423604 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.4480585816 + "score":0.5140115555 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.2165906221 + "score":0.3275813302 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.3271537328 + "score":0.5154143201 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.1017362354 + "score":0.1873357797 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.2782010079 + "score":0.4495400323 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.1041302213 + "score":0.2381984934 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.2865629267 + "score":0.4592277795 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.1031395116 + "score":0.2145991028 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.3223915745 + "score":0.4593715469 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.1194174782 + "score":0.3539022205 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.3618255907 + "score":0.5923278871 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"qu", - "task":"mgsm", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ro", + "task":"arc", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.0901332073 + "score":0.1770834914 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.2638668804 + "score":0.408612856 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.1408494847 + "score":0.429961987 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.2546101322 + "score":0.5972964968 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.0826481083 + "score":0.1435401219 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.2913230821 + "score":0.4105586063 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.1808682916 + "score":0.2886257739 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.3815777762 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.3 + "score":0.5087363637 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"mgsm", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"sd", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0012309971 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.0385987025 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0093358773 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.0457261214 + "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.0448599501 + "score":0.0740797406 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.1691371082 + "score":0.2542118208 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0601341974 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.0527194634 + "score":0.1994352479 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.0187725283 + "score":0.0285852473 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.1451005114 + "score":0.2160067741 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.0072043177 + "score":0.014651722 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.0622904587 + "score":0.1375629789 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.015192186 + "score":0.0391982932 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.1562018554 + "score":0.1784853107 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.0146518601 + "score":0.0244050078 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.0664358997 + "score":0.2037164659 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.7 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"so", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.0168598973 + "score":0.0803995043 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.1350682776 + "score":0.3185143496 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0192034206 + "score":0.0756351517 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.1607323446 + "score":0.3153237514 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.0538059584 + "score":0.1706373545 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.2453781212 + "score":0.4266803456 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.0917605905 + "score":0.2642729747 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.2668905804 + "score":0.4811936124 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.0217870696 + "score":0.1231167016 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.2165031068 + "score":0.3066261581 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.0464674805 + "score":0.100703346 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.2235940604 + "score":0.3394192326 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.1007032416 + "score":0.2316592529 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.2719560518 + "score":0.4448279614 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.1266242057 + "score":0.3469148634 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.3334538145 + "score":0.5817285551 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.0422003709 + "score":0.2029959378 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.253591842 + "score":0.4619706712 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.0280729387 + "score":0.266835444 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.1884927612 + "score":0.5363657682 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.0366379898 + "score":0.0974405375 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.1847934746 + "score":0.3189571047 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.0141355453 + "score":0.1013427217 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.1724636201 + "score":0.40431727 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.1550101498 + "score":0.2525105285 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.3623113506 + "score":0.4602158898 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.1217984824 + "score":0.1953888501 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.2801870917 + "score":0.4178007058 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"classification", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"th", + "task":"arc", "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.0606788965 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.1688995018 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0007119113 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.0386741345 + "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.0853746951 + "score":0.1343983036 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.3009803927 + "score":0.3632350324 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.0676677726 + "score":0.2056905071 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.2051763344 + "score":0.3721609069 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"mgsm", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"tr", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0118399471 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.0818929883 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0131103824 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.7 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.1075252941 + "score":0.1967376366 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.2810155518 + "score":0.4299666079 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.0683323294 + "score":0.2677982301 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.2657241512 + "score":0.4953792654 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.0921333598 + "score":0.1687607729 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.2640680177 + "score":0.4201489822 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.1042804602 + "score":0.2722608778 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.257176459 + "score":0.4873229562 }, { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"classification", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"ur", + "task":"arc", "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0453126073 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1567880475 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.027790575 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0855724163 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.096255918 + "score":0.1608191811 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.2490196736 + "score":0.4194174213 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.0759551519 + "score":0.1610922206 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.2765897266 + "score":0.3629853655 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.030810794 + "score":0.1209426537 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.1804383237 + "score":0.3088806755 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.0151653031 + "score":0.1161087561 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.0766086067 + "score":0.3498111478 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.1578714698 + "score":0.1204645669 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.3784433754 + "score":0.3876320563 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.1713340477 + "score":0.1940449441 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.3260532752 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0354904515 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.1880266806 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0178986288 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.0724032398 + "score":0.4193086485 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.0482308543 + "score":0.0921504626 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.2087387992 + "score":0.3200787292 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0574307954 + "score":0.0569972002 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1098000711 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0118398272 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.1505828307 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.008719744 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.056668863 + "score":0.1268003169 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0200324188 + "score":0.0423997321 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.1553578618 + "score":0.2100045407 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0095466427 + "score":0.0262399026 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.0491504248 + "score":0.1399469356 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.0513648793 + "score":0.1264279499 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.2577830867 + "score":0.3856755463 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.119690435 + "score":0.0962471892 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.1721639976 + "score":0.186203302 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.0821079546 + "score":0.1759566918 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.3164863838 + "score":0.4501489751 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.1265931852 + "score":0.2279432688 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.1793067232 + "score":0.2931038513 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.0115347204 + "score":0.1133170987 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.1485833844 + "score":0.3072773582 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0093856962 + "score":0.0779267738 }, { - "model":"microsoft\/phi-4-multimodal-instruct", + "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.0608458885 + "score":0.3132146793 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"classification", + "model":"openai\/gpt-3.5-turbo-0613", + "bcp_47":"zu", + "task":"truthfulqa", "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.1520421573 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.3707336059 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.0325845731 + "score":0.4 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.1866364833 + "model":"openai\/gpt-4.1", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", + "model":"openai\/gpt-4.1", + "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ak", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ak", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.049235994 + "score":0.0878643961 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.1875704973 + "score":0.2943661311 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0203716729 + "score":0.0773692656 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.1964052359 + "score":0.3411692596 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0457848104 + "score":0.1621142099 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.1635760551 + "score":0.4428263457 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0043114209 + "score":0.1504890085 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.0733956093 + "score":0.2959385484 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.0844832543 + "score":0.2484834927 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.2899357726 + "score":0.5412047755 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.0202296618 + "score":0.2278733475 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.1170466993 + "score":0.4958526675 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.1314009634 + "score":0.265308921 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.3827163755 + "score":0.534633443 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.0953897712 + "score":0.4133349725 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.2894343613 + "score":0.6096991153 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.0542604747 + "score":0.1444098549 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.2839533373 + "score":0.4354187609 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.0103488851 + "score":0.2001187188 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.1022931459 + "score":0.4423462053 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.1005104859 + "score":0.2150495101 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.3241333261 + "score":0.4509543639 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.0397410561 + "score":0.2997633261 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.1986373033 + "score":0.4845814777 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.0370753847 + "score":0.2491524665 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.2466649661 + "score":0.4859498791 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0104943059 + "score":0.138136017 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.1703228075 + "score":0.3570715701 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.1181763987 + "score":0.3489496041 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.3010924314 + "score":0.5581870497 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.0774366468 + "score":0.1361506132 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.232244564 + "score":0.3888910906 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.0700327695 + "score":0.1613185111 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.2793919522 + "score":0.4197419896 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.04780178 + "score":0.1539862054 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.1986318307 + "score":0.4377056399 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"classification", + "model":"openai\/gpt-4.1", + "bcp_47":"bho", + "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.0761832692 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.3293090829 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.0940299872 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.3022965125 - }, - { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.1421502617 + "score":0.2501533038 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.3249063292 + "score":0.5157072708 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.044984749 + "score":0.1995636011 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.1531327249 + "score":0.4138783532 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"classification", + "model":"openai\/gpt-4.1", + "bcp_47":"bn", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.1016298945 + "score":0.3114258781 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.2980803254 + "score":0.5457791444 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.0978160022 + "score":0.3254574442 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.2190252958 + "score":0.5346247566 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"classification", + "model":"openai\/gpt-4.1", + "bcp_47":"ceb", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.0784827192 + "score":0.399240538 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.3301673127 + "score":0.6190587277 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.0942154389 + "score":0.3751238401 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.3318581823 + "score":0.6143783892 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"mgsm", + "model":"openai\/gpt-4.1", + "bcp_47":"cs", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.0227564483 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.1618637003 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0041151275 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.052321141 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.1376994092 + "score":0.3217774713 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.3446006208 + "score":0.5782452692 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.1126650404 + "score":0.4138800821 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.3592815418 + "score":0.6092456527 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.1786073211 + "score":0.3464595133 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.418923403 + "score":0.5867734529 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.1529904036 + "score":0.477316562 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.4257110482 + "score":0.6811707635 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.0881428767 + "score":0.2881859392 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.2731602409 + "score":0.5283141363 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.1667346071 + "score":0.2909203719 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.3197259125 + "score":0.5023060375 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.3166864072 + "score":0.492992017 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.5483508218 + "score":0.6758612579 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.4404172544 + "score":0.6212437369 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7231001513 + "score":0.8160680265 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.1298121807 + "score":0.3073587665 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.3403579227 + "score":0.5589719771 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3743863952 + "score":0.4007144936 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.5971283997 + "score":0.6428162124 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.1016894588 + "score":0.3047473913 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.3636401028 + "score":0.541913317 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.1695199459 + "score":0.2033525098 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.3986853323 + "score":0.4483666995 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.1164300835 + "score":0.3915612434 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.3215620941 + "score":0.6080295028 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.1452350029 + "score":0.3199141865 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.4128118494 + "score":0.588811105 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.1078563354 + "score":0.301906911 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.3207926618 + "score":0.5589917916 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.3659011486 + "score":0.4922424861 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.580998869 + "score":0.6881055928 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0208055886 + "score":0.0362803832 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.1775856129 + "score":0.2412638087 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0303702553 + "score":0.0590184507 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1647346597 + "score":0.2182867648 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.0897802232 + "score":0.293318961 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.3334021167 + "score":0.5302757414 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.0546926081 + "score":0.1439576296 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.2631919591 + "score":0.4388678133 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.038636598 + "score":0.2178256702 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.1770095402 + "score":0.4424716551 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.0050909961 + "score":0.2443077504 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.1811657432 + "score":0.5192699912 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.1271878224 + "score":0.3513424619 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.3698932868 + "score":0.5811151557 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.1324625901 + "score":0.3856423281 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.3233634009 + "score":0.6039414456 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.0809118708 + "score":0.2481480247 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.2537010038 + "score":0.508486097 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.022242601 + "score":0.1531335794 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.2086622767 + "score":0.3911815819 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"classification", + "model":"openai\/gpt-4.1", + "bcp_47":"hu", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.1216394809 + "score":0.2876998483 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.4062520998 + "score":0.5628772937 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.1488006127 + "score":0.3576201412 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.3814897068 + "score":0.6153922032 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.1489627056 + "score":0.3584275831 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4172638299 + "score":0.6013859082 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.2008824981 + "score":0.3499912941 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.5185852751 + "score":0.6402792518 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.5 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0505138835 + "score":0.2060192505 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.1914888261 + "score":0.4844534641 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.0070198993 + "score":0.1961860496 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.1690394526 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 + "score":0.4729428536 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"mgsm", + "model":"openai\/gpt-4.1", + "bcp_47":"it", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.0982399037 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.2845447958 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0238061486 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.2070418144 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.1148524922 + "score":0.2912028765 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.3722842281 + "score":0.5457371537 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.250138544 + "score":0.3511828155 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5338430631 + "score":0.6085037742 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1169165949 + "score":0.2166158629 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.3638899173 + "score":0.5075364476 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.1330552123 + "score":0.2931058111 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.2685952079 + "score":0.4502993046 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.0823011221 + "score":0.3528030853 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.2825939861 + "score":0.5716064196 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.0327082346 + "score":0.2391235505 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.2319054893 + "score":0.5839078959 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"classification", + "model":"openai\/gpt-4.1", + "bcp_47":"kk", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 + "model":"openai\/gpt-4.1", + "bcp_47":"kk", + "task":"classification", + "metric":"accuracy", + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kk", - "task":"classification", + "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kk", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.044667859 + "score":0.2190440582 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.2691000298 + "score":0.4908662007 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.0732644907 + "score":0.3214797925 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.3296633392 + "score":0.5762282439 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.0353609299 + "score":0.2706398193 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.1909025949 + "score":0.5473511459 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.0077167113 + "score":0.1856132097 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.1386174808 + "score":0.3901344593 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.1165534681 + "score":0.2760595824 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.3877914341 + "score":0.523164531 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.1033665849 + "score":0.3130393907 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.3638806009 + "score":0.5247440023 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.1314926141 + "score":0.1776114575 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.3540405018 + "score":0.4741587712 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.1705869429 + "score":0.2067364767 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.2612780395 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0158033007 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.1802186885 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.004737288 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1665989397 + "score":0.3116379221 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.0968803629 + "score":0.3601446012 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.3271235347 + "score":0.5969111652 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.0742021289 + "score":0.2296100147 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.1811100359 + "score":0.4744292053 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.0830883828 + "score":0.3243424349 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.2590209016 + "score":0.5709461451 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.0473984845 + "score":0.1272378515 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.2714023791 + "score":0.4523606053 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mg", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"mg", + "task":"classification", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.0436065244 + "score":0.2917695916 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.1616079019 + "score":0.5143075365 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0321839146 + "score":0.2866152436 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.2872152251 + "score":0.5663273613 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.1039755938 + "score":0.3202923873 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.3670583743 + "score":0.5629214829 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.0490353313 + "score":0.2000751863 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.3016997477 + "score":0.4730887312 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.1016737952 + "score":0.247457636 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.3529445259 + "score":0.5269197766 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.0980137705 + "score":0.2884569727 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.3076980329 + "score":0.5016795899 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.2051934522 + "score":0.3751686059 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.4174861616 + "score":0.6197326636 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.1030714956 + "score":0.4401130744 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.3069420156 + "score":0.7145000136 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.1062506996 + "score":0.3303579297 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.3311804385 + "score":0.549332604 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.0661499319 + "score":0.2328260511 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.2408635082 + "score":0.5032600779 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.1305087747 + "score":0.307994769 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.336550146 + "score":0.5645741484 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.0829900967 + "score":0.2521628085 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.3039678683 + "score":0.5130367104 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.1166543201 + "score":0.2706764356 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.3031041679 + "score":0.5187692381 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.200304354 + "score":0.3256395629 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.4780661009 + "score":0.6002234371 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.5 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0265108253 + "score":0.2193020818 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.1166554461 + "score":0.4750942093 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0293332904 + "score":0.1102557203 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.1849240696 + "score":0.4682292826 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0177577979 + "score":0.1206079965 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.1411822431 + "score":0.3822546587 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0101970078 + "score":0.0450954747 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.1613941454 + "score":0.3804747142 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.0446786865 + "score":0.2759362863 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.1562050743 + "score":0.521953003 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.0022538162 + "score":0.1651830786 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.0960982382 + "score":0.4234486928 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3158857772 + "score":0.4189225146 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.5109022919 + "score":0.6647373749 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.2903521386 + "score":0.4787138393 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.4758823803 + "score":0.6097836343 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.1198559998 + "score":0.2802625681 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.3492711529 + "score":0.5439670195 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.2480316528 + "score":0.3443088412 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.4488014348 + "score":0.5816388936 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.1398701241 + "score":0.3203407715 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.3034565852 + "score":0.5533544406 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.3460979115 + "score":0.4490877978 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.5835851988 + "score":0.6695132668 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"qu", - "task":"mgsm", + "model":"openai\/gpt-4.1", + "bcp_47":"ro", + "task":"arc", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.1130151873 + "score":0.2794082054 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.3818050844 + "score":0.5408123233 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.1838222494 + "score":0.4984350217 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.416344125 + "score":0.6700105545 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.0947682488 + "score":0.2317861129 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.3259777135 + "score":0.5199717777 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.1796274314 + "score":0.3286463098 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.4360781177 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.7 + "score":0.5704087395 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"mgsm", + "model":"openai\/gpt-4.1", + "bcp_47":"sd", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0300958323 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.2021438397 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0097834933 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.1744531846 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.0299661217 + "score":0.3269082527 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.204544657 + "score":0.5720782047 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0009238366 + "score":0.2945581276 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.0843772457 + "score":0.4770478865 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.031724087 + "score":0.2237147063 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.16451202 + "score":0.4928025786 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.0046124791 + "score":0.2213737985 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.1085913002 + "score":0.4089512188 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.017437495 + "score":0.0991434845 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.142035614 + "score":0.3451095887 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.0146532378 + "score":0.1515589229 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.1923028552 + "score":0.5015201773 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.0516240546 + "score":0.2407783488 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.153428686 + "score":0.4696462601 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0121002424 + "score":0.1992814962 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.1697462625 + "score":0.4739682422 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.115711536 + "score":0.2962617057 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.3597872407 + "score":0.5718773299 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.1694354423 + "score":0.3246138439 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.4167060912 + "score":0.539231236 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.0407632458 + "score":0.2138222548 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.2238900502 + "score":0.4678880839 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.0714251247 + "score":0.1812913523 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.2764333203 + "score":0.4862460633 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.2208693059 + "score":0.2736390873 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.4679683611 + "score":0.5516496981 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.3234795754 + "score":0.3409932056 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.5608576982 + "score":0.6325116451 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.0669163701 + "score":0.3084306564 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.2784916366 + "score":0.5446161895 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.0633186191 + "score":0.4087794747 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.3074668268 + "score":0.669062824 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.0917433239 + "score":0.2266585274 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.3851148557 + "score":0.5043938863 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.1299606269 + "score":0.2800009794 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.3428948363 + "score":0.5340783161 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.1471870965 + "score":0.3852899552 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.3392441061 + "score":0.6247940844 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.2060450795 + "score":0.3365460818 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.3717773766 + "score":0.5508261106 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"classification", + "model":"openai\/gpt-4.1", + "bcp_47":"th", + "task":"arc", "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.0594528699 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.1988219607 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.1247215313 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.1079848157 + "score":0.2949417989 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.2801778291 + "score":0.5355554723 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.1131365873 + "score":0.3946124626 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.2713637811 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.1 + "score":0.5531143677 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"mgsm", + "model":"openai\/gpt-4.1", + "bcp_47":"tr", + "task":"arc", "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0118794667 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.1249552242 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0507921341 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.1097631082 + "score":0.2730640179 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.3352988316 + "score":0.539343275 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.0795019275 + "score":0.367297377 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.2884455353 + "score":0.6209268292 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.0837893895 + "score":0.2800024381 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.2809963487 + "score":0.5593725229 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.2215012201 + "score":0.3549515665 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.4551853935 + "score":0.5522777328 }, { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"classification", + "model":"openai\/gpt-4.1", + "bcp_47":"ur", + "task":"arc", "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0345153294 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1364890072 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0012941396 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1092334478 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.0522713846 + "score":0.2068585944 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.3192866676 + "score":0.5050627139 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.0868686952 + "score":0.2927501641 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.2859772299 + "score":0.484706219 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.0689558305 + "score":0.2209837875 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.22853185 + "score":0.4853024301 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.0415402981 + "score":0.2062980634 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.2227329297 + "score":0.5064032134 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.0649160569 + "score":0.2809055533 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.2830042558 + "score":0.5421068577 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.0350138164 + "score":0.4180012555 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.2206802597 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0588767323 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2123528181 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0134096062 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1594472691 + "score":0.6302564473 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.0908263331 + "score":0.2097645573 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.3394219762 + "score":0.4732281256 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.119486019 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.0645497034 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0478741208 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.1576256072 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0235106256 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.1852540612 + "score":0.1666195088 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.1 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0235724586 + "score":0.1280239382 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.1570768217 + "score":0.360470667 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0152437624 + "score":0.0762109546 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.1214492647 + "score":0.2828209251 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.0740063452 + "score":0.1794305621 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.2757375638 + "score":0.4543396215 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.0872564614 + "score":0.1841660038 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.1427963743 + "score":0.2628923071 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.0992800287 + "score":0.2064136736 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.324429867 + "score":0.5051800847 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.09185491 + "score":0.2731019968 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.1769207611 + "score":0.3107160924 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.3 + "score":1.0 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.0494841031 + "score":0.278809167 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.1999996494 + "score":0.5407280723 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0196415161 + "score":0.266135659 }, { - "model":"mistralai\/mistral-nemo", + "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.1910730769 + "score":0.5629331219 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.2867784698 + "score":0.2198316321 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.5037863792 + "score":0.4708151995 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.2421610142 + "score":0.2234579509 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.4148106883 + "score":0.4401488964 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ak", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.1 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.0787707917 + "score":0.1056657743 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.218746848 + "score":0.3249231698 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0187531501 + "score":0.0618207736 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.1112723085 + "score":0.340632844 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0462314764 + "score":0.1521950168 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.1434072436 + "score":0.3956387285 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0132821079 + "score":0.0953768122 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.0456848091 + "score":0.2497030659 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.2054466179 + "score":0.2413546506 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.4635698598 + "score":0.5235234652 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.2513408047 + "score":0.2271910382 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.5078766295 + "score":0.4908497482 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.278999196 + "score":0.2566573338 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5072892325 + "score":0.5183862763 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.335915232 + "score":0.3901123396 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5453940527 + "score":0.5771753105 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1240264763 + "score":0.1301518556 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.3914345538 + "score":0.4290918442 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1767788852 + "score":0.1797566847 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3838449247 + "score":0.4271851106 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.2249154291 + "score":0.1724971212 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4283700551 + "score":0.4012455839 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2483121176 + "score":0.2496844101 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4611825726 + "score":0.4720007075 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.1368466985 + "score":0.1595843783 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.3988973343 + "score":0.4348621346 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0337104112 + "score":0.1540181476 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.2239889311 + "score":0.351822758 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.2836423323 + "score":0.2979811644 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.4827693819 + "score":0.5341221534 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.1511392088 + "score":0.2462308641 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.3858872623 + "score":0.4432887674 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.1923090312 + "score":0.1840798833 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.3973361244 + "score":0.4151816693 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1398741741 + "score":0.1349305067 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.4023712427 + "score":0.397143235 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.1262645615 + "score":0.1644448391 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.389349888 + "score":0.4629038808 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.164496362 + "score":0.2550498255 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.42344822 + "score":0.4544854197 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.2246336129 + "score":0.2230536146 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.4406538597 + "score":0.499651958 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1192868334 + "score":0.2227847146 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.3094007011 + "score":0.40695057 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2557484343 + "score":0.2819696539 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4816174974 + "score":0.5293077213 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.336565743 + "score":0.3092254935 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.500099888 + "score":0.4661357412 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.1502069597 + "score":0.365645255 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.365503748 + "score":0.5771393179 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.1518861892 + "score":0.3274189601 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.3795025844 + "score":0.5630354446 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.0716255326 + "score":0.1379843601 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.2354287318 + "score":0.3936670775 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0016024995 + "score":0.1018796158 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.1289259809 + "score":0.3594406238 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.2895060168 + "score":0.2953385985 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.5088283599 + "score":0.5543620654 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.3502843148 + "score":0.342919616 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5851011111 + "score":0.5729115023 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.33652498 + "score":0.3052927761 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5394051209 + "score":0.5504382993 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4124704223 + "score":0.4246355556 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6500309258 + "score":0.6487523813 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2440848305 + "score":0.2853090403 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.4482066389 + "score":0.5033746216 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3683187834 + "score":0.2976764649 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5300499022 + "score":0.4568078793 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4669071745 + "score":0.5178458342 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6351301458 + "score":0.6792020066 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.5813419207 + "score":0.6631992536 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8065247071 + "score":0.8257245236 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.293714449 + "score":0.2800331904 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.4892518335 + "score":0.5328441069 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3400529578 + "score":0.3927902573 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6054518089 + "score":0.628791549 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.2392739698 + "score":0.274202443 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.4813714407 + "score":0.5239221129 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.2321648572 + "score":0.144419277 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.4166342577 + "score":0.405746187 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.2811212879 + "score":0.3517517227 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.4244631944 + "score":0.5678423102 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.2282664087 + "score":0.2861570496 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.5153269959 + "score":0.567530869 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2548863763 + "score":0.2950999056 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5181895957 + "score":0.5638983665 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.4309072933 + "score":0.5061822417 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6122951839 + "score":0.6903823708 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0365190298 + "score":0.0579371031 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.1781763265 + "score":0.2407036725 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0417146562 + "score":0.0338899407 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.0914072868 + "score":0.1685773285 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.266641943 + "score":0.2249525185 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.4689143537 + "score":0.4726822454 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1714078748 + "score":0.1646493878 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.45528413 + "score":0.4071725376 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.3 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.0639113657 + "score":0.2293529776 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.1707631202 + "score":0.4521332467 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.0551968249 + "score":0.2554536105 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.165412979 + "score":0.5371463729 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3708164771 + "score":0.3531906075 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5780452995 + "score":0.6060071382 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3889665973 + "score":0.399293733 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5940361548 + "score":0.6132292528 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.261843766 + "score":0.2232750657 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.4806474097 + "score":0.4801269988 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.0995040783 + "score":0.1922860161 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.3935925698 + "score":0.4363534921 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.218205371 + "score":0.2647815263 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.4561374245 + "score":0.5197043469 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.2523642916 + "score":0.3268056763 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.5178642158 + "score":0.5486126608 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2714445111 + "score":0.2559881532 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5033343062 + "score":0.5349715693 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3367134056 + "score":0.3900018149 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6204206544 + "score":0.6494354052 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0577998278 + "score":0.1469460203 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.184470268 + "score":0.3801695829 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.0317976664 + "score":0.1926475709 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.1311048104 + "score":0.4187626054 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.097793149 + "score":0.1826483605 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.2519860373 + "score":0.4686350803 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.0683877466 + "score":0.1078652833 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.255829494 + "score":0.421647984 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.26428669 + "score":0.3047636442 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.4902151754 + "score":0.5385736571 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3239977856 + "score":0.3458314466 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5865126635 + "score":0.5969984451 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2208745982 + "score":0.2100828863 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4944838309 + "score":0.4717405627 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.1755723698 + "score":0.2844229339 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.3733502483 + "score":0.4435245651 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.1125731148 + "score":0.2977682173 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.2778916971 + "score":0.5413323701 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.0861201622 + "score":0.2537598479 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.3310005151 + "score":0.5629521778 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.1995238484 + "score":0.1646050237 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.4335224538 + "score":0.4508391233 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.172304501 + "score":0.2752297553 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.4246105774 + "score":0.5180256955 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.0572277693 + "score":0.2132140468 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.2158207267 + "score":0.5000034068 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.1292151863 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.0812368695 + "score":0.3392182289 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.2415284955 + "score":0.2382712271 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.4761318508 + "score":0.4901100456 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.2506029382 + "score":0.2584591395 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.4820610024 + "score":0.4586627531 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2390442925 + "score":0.1783139223 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4660486517 + "score":0.4481556757 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.1889249825 + "score":0.3002017818 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.314876296 + "score":0.3739762238 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0673075407 + "score":0.0772118618 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2078406147 + "score":0.2815494636 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0194519734 + "score":0.001488949 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.1011298866 + "score":0.1588971491 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.2922121087 + "score":0.3080966975 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.5037938788 + "score":0.5628489014 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2113094586 + "score":0.2981973224 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.4865126178 + "score":0.5193764902 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.2362297066 + "score":0.2802761469 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.4746614882 + "score":0.5391751615 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.108285746 + "score":0.2042851472 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.3934483867 + "score":0.4615978684 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.0484302224 + "score":0.243797007 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.2222317379 + "score":0.4981055966 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0220530515 + "score":0.2038296766 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.2506994166 + "score":0.5453530515 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.2936668736 + "score":0.2649575888 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.4940538554 + "score":0.5072138807 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.1686173343 + "score":0.186903033 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4033123912 + "score":0.411527522 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.3097940645 + "score":0.2358876365 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5304242832 + "score":0.4961149155 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2168719994 + "score":0.2674122275 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4555868419 + "score":0.4442281313 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.3030951939 + "score":0.3443124421 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5195567075 + "score":0.5824988714 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.3655435175 + "score":0.3763691574 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6292737269 + "score":0.6619682382 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.20731642 + "score":0.2596129619 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.414222781 + "score":0.485235691 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.0884087592 + "score":0.2234699025 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.2678188556 + "score":0.4866737746 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.2746115511 + "score":0.3085593402 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.5022730585 + "score":0.5486177789 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.1969099003 + "score":0.2319189577 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4481037581 + "score":0.4830752425 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.23536401 + "score":0.2639124065 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.4943928771 + "score":0.5166255119 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.3235114454 + "score":0.342163716 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.5875209718 + "score":0.6089275595 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0495608632 + "score":0.158564127 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.2045968087 + "score":0.4086927045 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0294620037 + "score":0.1237632416 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.1680044731 + "score":0.4428640995 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0200154664 + "score":0.1266863364 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.1606834413 + "score":0.3723937215 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0044245595 + "score":0.0507341481 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.1054952984 + "score":0.3372593565 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"or", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"or", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.1001204869 + "score":0.2495546416 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.2836152046 + "score":0.505250418 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.0445164582 + "score":0.1370990235 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.2337334441 + "score":0.4188964845 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4077844252 + "score":0.3898113091 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6076754833 + "score":0.6349932626 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4459003493 + "score":0.4218934881 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5840266721 + "score":0.5699211354 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2148436144 + "score":0.2238263799 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.4670207413 + "score":0.5157447202 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.2898684366 + "score":0.3376407171 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.525989117 + "score":0.5674744623 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.264907032 + "score":0.2989764302 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.4939362461 + "score":0.5575461672 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4563355662 + "score":0.4347143661 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6843169799 + "score":0.6526848356 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.258396409 + "score":0.2365245444 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.4912290692 + "score":0.5263187531 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.4713411152 + "score":0.4295443245 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6517904546 + "score":0.6303158648 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1955652432 + "score":0.2156464838 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4654058492 + "score":0.4961661832 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.2591393679 + "score":0.3525318267 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.4936043335 + "score":0.5377697887 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.0562734776 + "score":0.1358779492 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.212197658 + "score":0.4047237198 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.0314083234 + "score":0.2528406351 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.1492631083 + "score":0.5425926629 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.0890242869 + "score":0.3052635197 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.2507686532 + "score":0.5444415164 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.319777613 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.0807676975 + "score":0.5070316671 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.0084519738 + "score":0.224754909 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.151041875 + "score":0.4942892862 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.0056901248 + "score":0.1763683901 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.100130068 + "score":0.3592673643 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0267041676 + "score":0.0579407228 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.1555601794 + "score":0.3218620552 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.018455165 + "score":0.1045487932 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.1280213362 + "score":0.4627951581 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.0558797598 + "score":0.2457303069 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.1672295272 + "score":0.4607096598 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0266323434 + "score":0.2208751843 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.1344659816 + "score":0.499725177 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2165549669 + "score":0.2080392025 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.4902121608 + "score":0.5029005766 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.2493024035 + "score":0.3955111551 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.4391116426 + "score":0.5840966612 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.1532491466 + "score":0.2108703792 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.3330342559 + "score":0.4463761953 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.0762662838 + "score":0.1957956536 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.2740420072 + "score":0.5109625366 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.2875970952 + "score":0.3026696791 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.4962250868 + "score":0.5540321116 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.382073635 + "score":0.379491342 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6293993104 + "score":0.6341129937 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.0845702794 + "score":0.2373352462 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.2799479817 + "score":0.4812305289 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.1327606257 + "score":0.3592771753 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.4346855791 + "score":0.6196466978 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2777158956 + "score":0.2189074797 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.4958635491 + "score":0.458549356 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.2970897235 + "score":0.2096038798 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.5394670378 + "score":0.48158495 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3805998732 + "score":0.3847076164 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5566308844 + "score":0.5961555843 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3811390337 + "score":0.3031970309 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5895281984 + "score":0.4938747459 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.0685553777 + "score":0.1923337483 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.2845942287 + "score":0.4073259848 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.0371906835 + "score":0.1777393755 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.1797173863 + "score":0.4229927395 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2079294904 + "score":0.1973765077 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.4320631023 + "score":0.478302799 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.2456928253 + "score":0.390011731 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.3989628007 + "score":0.5192332126 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.2 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0154825384 + "score":0.0956165324 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.1339811483 + "score":0.3010660185 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0201085128 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.0278445131 + "score":0.1279466164 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2699521486 + "score":0.3027350341 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.478882362 + "score":0.5488245098 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.2654978305 + "score":0.3543135567 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5487755246 + "score":0.5739783335 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2622533206 + "score":0.2739321887 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.4831695415 + "score":0.523898319 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.3002613398 + "score":0.3702945368 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.4834403722 + "score":0.5833117124 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0279025481 + "score":0.0456473272 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.0843438607 + "score":0.1799246176 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0224775292 + "score":0.0060102851 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.1057554869 + "score":0.0643020373 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2469989894 + "score":0.2282243664 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.4943114536 + "score":0.4878680978 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2844546137 + "score":0.2701355148 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.477210689 + "score":0.4485608146 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.1727284585 + "score":0.2068814622 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.4250344787 + "score":0.4820023997 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.1817744295 + "score":0.1797290418 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.4656549066 + "score":0.5053214161 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2373174322 + "score":0.2800966186 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.4521152897 + "score":0.5447813345 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.2962197342 + "score":0.3743034645 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.544285644 + "score":0.5977965321 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0678548322 + "score":0.0698928855 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.1995976377 + "score":0.2670199291 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0410008999 + "score":0.0438604879 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.1410775666 + "score":0.2188199264 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.1807443545 + "score":0.169474795 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4282740606 + "score":0.4233016879 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1030562145 + "score":0.1115736327 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1694260317 + "score":0.1741292068 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.0705660888 + "score":0.1777667306 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.1999709116 + "score":0.4381801577 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.005606616 + "score":0.0807523022 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.0791302868 + "score":0.4027285347 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.5 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0146140319 + "score":0.0871363585 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.1546761245 + "score":0.3153084592 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0066574272 + "score":0.0712302827 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.0720455741 + "score":0.266201042 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4.1-mini", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1686505919 + "score":0.1722143774 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4407452421 + "score":0.451617464 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1561257665 + "score":0.1362044502 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2413348415 + "score":0.2472412788 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2391219094 + "score":0.211203078 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4993680631 + "score":0.4926704854 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2546682455 + "score":0.2808125016 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3131256963 + "score":0.3349070044 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.4 + "score":1.0 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.0409238482 + "score":0.1986426867 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.1759269251 + "score":0.4568960366 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.040396663 + "score":0.228999134 }, { - "model":"mistralai\/mistral-saba", + "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.1387970813 + "score":0.5092438205 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-mini", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.2175149129 + "score":0.1996215211 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.4527759686 + "score":0.452996678 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.1124798847 + "score":0.1894542228 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.2758121544 + "score":0.4104759123 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.0499477269 + "score":0.0772997859 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.161467557 + "score":0.2795137394 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0061206295 + "score":0.0490092548 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.1175310591 + "score":0.2793012345 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.1 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.4 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0 + "score":0.0821675771 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.0682401612 + "score":0.3261780265 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0028802187 + "score":0.0581954137 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.0506386945 + "score":0.2133137227 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.1505032551 + "score":0.2038935703 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.4086092545 + "score":0.4744865332 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.1882561377 + "score":0.198427289 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.3814760125 + "score":0.4466553325 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.1878091774 + "score":0.2230716751 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.4280751788 + "score":0.4868000305 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.2100749947 + "score":0.3431634646 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.3864616183 + "score":0.5539675011 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.093266394 + "score":0.0936861 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.311215609 + "score":0.4019740671 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1039989943 + "score":0.1320458692 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.2907556954 + "score":0.3969132003 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.1020177653 + "score":0.147678651 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.3338664094 + "score":0.3870664018 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.1064323135 + "score":0.1925682475 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.3080022567 + "score":0.424235974 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.0979045908 + "score":0.1549108661 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.3174950846 + "score":0.3657540248 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.0117862293 + "score":0.1300687711 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.1847940791 + "score":0.3850114254 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.2573591397 + "score":0.2822625676 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.4309874046 + "score":0.5120051075 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.0851522303 + "score":0.1956229389 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.2284178182 + "score":0.4165258378 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.0818689903 + "score":0.1020059939 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.2636451344 + "score":0.3685311802 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.0746797577 + "score":0.1584050367 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.3147526037 + "score":0.4078207292 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.0989261118 + "score":0.129620916 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.3536619814 + "score":0.4193741335 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.1746758677 + "score":0.1911430477 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.3604842775 + "score":0.423978547 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.1908291186 + "score":0.1839199068 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.4048132215 + "score":0.4589371965 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.0320570973 + "score":0.1754477624 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.1921106676 + "score":0.3729187467 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.1554049163 + "score":0.2104863522 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.3557989532 + "score":0.4534437048 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.2229681692 + "score":0.2824626 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.3722031872 + "score":0.4822940799 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ceb", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.2130798288 + "score":0.3067568845 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.3738480621 + "score":0.522767718 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.1589337472 + "score":0.2603548365 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.4334432444 + "score":0.5472674101 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.037561576 + "score":0.061613272 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.1318206471 + "score":0.2738044534 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0495010223 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.1217595648 + "score":0.2930209689 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"cs", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.210101514 + "score":0.2480975275 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.4301134482 + "score":0.4908345188 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.362165784 + "score":0.2780131154 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5625700474 + "score":0.5272272242 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.2836619572 + "score":0.282506513 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5189927538 + "score":0.5438865496 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.3481484827 + "score":0.4220387975 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.5703115876 + "score":0.6440212985 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2310257801 + "score":0.2138985353 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.4431650209 + "score":0.4787400928 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.2968021074 + "score":0.3347502447 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.4607855577 + "score":0.5277403226 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4358717425 + "score":0.4926163025 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6055153523 + "score":0.6598180449 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.6254747881 + "score":0.5627424753 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7898596498 + "score":0.7654936904 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.2211802733 + "score":0.2400384539 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.4673850088 + "score":0.4846098061 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3141247128 + "score":0.331169359 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.5664089061 + "score":0.541145091 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.1378005544 + "score":0.1934774812 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.3634410941 + "score":0.4555822394 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.1662541754 + "score":0.1515704996 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.3656154799 + "score":0.4288811212 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.2598161419 + "score":0.3049498802 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.4673557809 + "score":0.531726813 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.2352273865 + "score":0.2572361601 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.5423645035 + "score":0.5465796366 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2637885864 + "score":0.2245970544 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.50661739 + "score":0.4773636644 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.3898419239 + "score":0.456500631 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.5983170279 + "score":0.6519350009 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.030172991 + "score":0.0508716923 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.1791853335 + "score":0.2061725545 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0012644122 + "score":0.001678581 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.0541809315 + "score":0.0842472305 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.1879687767 + "score":0.1811700298 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.3575768224 + "score":0.4450724584 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1193068232 + "score":0.1152635411 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.3258076554 + "score":0.3751269086 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.2 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.0424529379 + "score":0.1042620188 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.205545596 + "score":0.353512414 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.0315047557 + "score":0.1697643488 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.1862281652 + "score":0.4858315893 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.2931006661 + "score":0.3100386494 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5117710763 + "score":0.5747433617 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3454885653 + "score":0.3858833658 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5632832845 + "score":0.5991711103 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.1423442665 + "score":0.1533855474 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.3938289086 + "score":0.4227840042 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.0479071398 + "score":0.1131265551 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.2390532358 + "score":0.3711711494 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.1466679693 + "score":0.2089476707 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.4020226017 + "score":0.4868357652 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.2296543368 + "score":0.2953914361 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.4623290904 + "score":0.5360583303 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.1846129963 + "score":0.1907075731 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4218789485 + "score":0.4662972265 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.2363785743 + "score":0.3386484563 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.5715726858 + "score":0.6376664219 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.042005649 + "score":0.0974819198 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.1704312564 + "score":0.3736857308 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.0060037968 + "score":0.170722725 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.1052387436 + "score":0.4039469282 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.1206947602 + "score":0.1538751748 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.2893596175 + "score":0.3921570735 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.0376635554 + "score":0.1222763549 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.2154020665 + "score":0.3651682861 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.203133363 + "score":0.2389788634 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.4421827582 + "score":0.4950691973 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.2636104621 + "score":0.3197810714 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5093890816 + "score":0.5649240218 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.1660483895 + "score":0.2240038475 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4317128373 + "score":0.4860646744 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2448276505 + "score":0.2284065848 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4102738917 + "score":0.3753787999 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.1159703103 + "score":0.1867349669 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.3116167676 + "score":0.4122967846 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.0803083214 + "score":0.20591358 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.3001867634 + "score":0.5231507594 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.1346426707 + "score":0.1757000759 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.3961827686 + "score":0.4569938635 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.138703676 + "score":0.2608919204 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.4106570721 + "score":0.495117819 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.0501718274 + "score":0.1391396286 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.2070022512 + "score":0.3419293202 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.0014765966 + "score":0.11776525 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.0534187009 + "score":0.3296737913 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.1342281856 + "score":0.1659549387 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.3526105747 + "score":0.4341818109 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.146553268 + "score":0.1728970527 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.3291051456 + "score":0.4006604704 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.1737342381 + "score":0.1418786679 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4232883693 + "score":0.4096096806 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.2383832092 + "score":0.2767863837 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.33001113 + "score":0.343465352 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.0707102369 + "score":0.0557337494 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.2324042355 + "score":0.2661879916 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.010496354 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.1164466909 + "score":0.2072817599 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.2572289084 + "score":0.2919442529 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.5125685183 + "score":0.558994569 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.1223672825 + "score":0.2232843577 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.3023512099 + "score":0.4785649547 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.2098940087 + "score":0.1885764001 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.443017936 + "score":0.4905954379 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.0762167285 + "score":0.1518354017 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.2552721118 + "score":0.4184448049 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.0777950532 + "score":0.2031343023 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.2573049595 + "score":0.4809424331 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.0666322315 + "score":0.1531227243 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.3626993592 + "score":0.486171029 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ml", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.2059798463 + "score":0.2649114053 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.4231363675 + "score":0.5258722646 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.2059172406 + "score":0.1638513843 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4148312305 + "score":0.3671674679 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2216993022 + "score":0.1755855974 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.4221894818 + "score":0.464470709 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.1386303624 + "score":0.2053629902 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.3369477219 + "score":0.4466569291 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.2392636803 + "score":0.2391713081 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.4545241599 + "score":0.5284921106 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.3632780792 + "score":0.3851770392 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6254141203 + "score":0.6778949951 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"my", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"my", + "task":"classification", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.0217828279 + "score":0.1921402736 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.144054989 + "score":0.4286234239 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.0351771663 + "score":0.226941594 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.2287244941 + "score":0.4545167964 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.1464596557 + "score":0.2044174225 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.3564056311 + "score":0.4393220695 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.1517063855 + "score":0.204358035 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4352500122 + "score":0.4838815717 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2230448991 + "score":0.222496921 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.4584787016 + "score":0.455228974 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.2531295878 + "score":0.2971608126 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.5302964071 + "score":0.5809601739 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0696320569 + "score":0.0616783152 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.2331307278 + "score":0.2486430016 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0252073886 + "score":0.1096684518 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.1703033014 + "score":0.4236658223 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0276939955 + "score":0.0470010342 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.1837469296 + "score":0.3053087334 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0160221908 + "score":0.0354609608 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.2144565152 + "score":0.3000471846 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.083347512 + "score":0.1342178934 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.2190646209 + "score":0.4186845018 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.0679191643 + "score":0.0959778877 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.2873941526 + "score":0.4062370429 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", - "task":"mgsm", + "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3647734864 + "score":0.3790281875 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.5784168493 + "score":0.5948460259 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.3897396366 + "score":0.4122107278 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5030239884 + "score":0.5709045042 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.1850936564 + "score":0.1956391774 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.3999928464 + "score":0.4696282098 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.2726874239 + "score":0.3089097764 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.4948927457 + "score":0.5592753275 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2070601418 + "score":0.2493437671 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.4601106145 + "score":0.4876335319 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.412349088 + "score":0.3816451478 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6102742767 + "score":0.6189446172 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.2142629544 + "score":0.2159926241 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.4499459763 + "score":0.485645425 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.3895465667 + "score":0.4231210461 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.5708848992 + "score":0.611328256 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.1710045162 + "score":0.2106935755 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.433047449 + "score":0.4916756186 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.2961144006 + "score":0.2957139688 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5257430939 + "score":0.5505026606 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.057729338 + "score":0.0983614688 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.2177957601 + "score":0.3467631983 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.0230572611 + "score":0.2084502331 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.16449999 + "score":0.5081363979 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.0484864486 + "score":0.183239364 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.1952223401 + "score":0.4023445581 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.0005975301 + "score":0.188899922 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.1227717162 + "score":0.3987821089 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.0 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.0165865489 + "score":0.1067913788 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.1532087128 + "score":0.3496287521 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.1841725143 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.1038227782 + "score":0.3632256251 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sn", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0457227327 + "score":0.0529920463 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.2051868353 + "score":0.282085967 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.0141586748 + "score":0.1362552545 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.1192333436 + "score":0.4625012714 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"so", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"so", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.0421597981 + "score":0.1467149035 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.1665679168 + "score":0.3956649623 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.0011074127 + "score":0.1685063005 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.1483863351 + "score":0.4680460244 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.1814266299 + "score":0.1938115187 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.4404144211 + "score":0.4876215653 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.2821064012 + "score":0.301648159 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.492442613 + "score":0.5098794037 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.1500059372 + "score":0.1547225512 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.385325025 + "score":0.3915293941 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.0190556256 + "score":0.1695373764 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.201727165 + "score":0.4747320433 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.2063172621 + "score":0.2691126673 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.4245629061 + "score":0.4857803464 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.2871245672 + "score":0.3512121942 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.5575367366 + "score":0.6095777745 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.1106429776 + "score":0.1987953868 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.3246322884 + "score":0.4232825095 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.1586725311 + "score":0.2845246017 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.4486820539 + "score":0.5836686109 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.0861902503 + "score":0.1209729479 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.2737902674 + "score":0.3863152501 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.1910130331 + "score":0.2257337081 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.4159297845 + "score":0.4945472603 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"te", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"te", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.2631133201 + "score":0.3081208582 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.504043761 + "score":0.5470122853 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.2496114121 + "score":0.2720935434 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.4707696336 + "score":0.4603538628 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.0528559098 + "score":0.1562871243 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.2067746551 + "score":0.4076252967 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.0275980154 + "score":0.1559524999 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.1880963665 + "score":0.4140020888 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.1533969949 + "score":0.1699563701 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.3718867563 + "score":0.4279668426 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.2858310833 + "score":0.3008412738 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.4323129392 + "score":0.4707696326 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"classification", "metric":"accuracy", - "score":0.3 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.0536249593 + "score":0.0460275677 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.190207113 + "score":0.2471496791 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0301094125 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.0308454815 + "score":0.0934926984 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.1776801562 + "score":0.1772339365 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.4036600408 + "score":0.4203181275 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.2335702423 + "score":0.3130066985 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.4992388897 + "score":0.5687455638 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.1788374332 + "score":0.2094411351 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.4671147568 + "score":0.4706103434 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.2348003993 + "score":0.3725710921 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.4786253942 + "score":0.5552868727 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.2 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0150184554 + "score":0.0334143542 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.1309010161 + "score":0.1507136538 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0010839978 + "score":0.0016606076 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.0719045729 + "score":0.0502679049 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.1854021136 + "score":0.1864708336 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.4258067424 + "score":0.4670816214 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.1388133394 + "score":0.2648238029 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.3685419874 + "score":0.4478960511 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.1877241389 + "score":0.1939396294 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.4060591516 + "score":0.4361718347 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.1165785177 + "score":0.1189904742 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.4074870036 + "score":0.4347992199 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2204611632 + "score":0.2392083536 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.4366931331 + "score":0.5109371286 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.2957932526 + "score":0.3593480951 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5127606293 + "score":0.5929525126 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0530966299 + "score":0.0586128965 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.183305815 + "score":0.2347632724 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.0017326575 + "score":0.0021532802 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.1225688999 + "score":0.1133302543 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.1059358827 + "score":0.160147676 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.3291955196 + "score":0.391740055 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0116334446 + "score":0.0779637528 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.0643182856 + "score":0.1486256305 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.0665909516 + "score":0.1080830211 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.2275785677 + "score":0.337342999 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.0230978994 + "score":0.0541477061 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.1160311087 + "score":0.3900223164 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.7 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0299390587 + "score":0.0799412014 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.1474455997 + "score":0.2678562615 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0 + "score":0.0384415516 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.0919549448 + "score":0.2172940187 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"yue", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1503741808 + "score":0.1368607253 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.3985964495 + "score":0.4397284879 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1755859315 + "score":0.1909241711 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2407951689 + "score":0.2765267822 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1386896901 + "score":0.1574414981 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.413832278 + "score":0.4616304665 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2503925306 + "score":0.2468189144 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3045084897 + "score":0.3136635386 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.5 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.2 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.0365820579 + "score":0.1806603372 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.1964572986 + "score":0.4477026286 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.0044447951 + "score":0.1983500358 }, { - "model":"mistralai\/mistral-small-3.1-24b-instruct", + "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.1846817289 + "score":0.4823277126 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4.1-nano", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", - "score":0.2428195445 + "score":0.2046887048 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.5030866734 + "score":0.3905043974 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", - "score":0.2620151049 + "score":0.2440190587 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", - "score":0.4506581715 + "score":0.4467530618 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.1 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.0873196299 + "score":0.1088055906 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.2791620733 + "score":0.2952376966 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0938455414 + "score":0.04860361 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.3566262434 + "score":0.2749922921 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.2 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.1724572981 + "score":0.080495827 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.4396154177 + "score":0.287512266 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.151364721 + "score":0.0649609212 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.2888758833 + "score":0.203944936 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.2635134498 + "score":0.2086617902 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.5460096147 + "score":0.4774317011 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.2693884383 + "score":0.2730334942 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.5166212712 + "score":0.5458981435 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ar", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2499610519 + "score":0.2530052174 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5318854927 + "score":0.5158812138 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.4039353928 + "score":0.3439536667 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.6016089011 + "score":0.5691908832 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1580325965 + "score":0.1377297001 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4452763225 + "score":0.4304104417 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1755215463 + "score":0.1906837255 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.4247958024 + "score":0.3931621016 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.2186059158 + "score":0.1573943285 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4527608912 + "score":0.4039837102 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2928280563 + "score":0.2385684611 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4822967505 + "score":0.4690487202 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"as", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"as", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.2425216718 + "score":0.1786795263 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.4831264986 + "score":0.4382834543 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.1377054899 + "score":0.142030089 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.3411255917 + "score":0.3706217658 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"awa", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"awa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.3603859441 + "score":0.2061194828 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5684805903 + "score":0.38382712 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.1408898772 + "score":0.2351754729 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.3872495567 + "score":0.4412955741 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"az", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.169544784 + "score":0.1025818924 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.4154274624 + "score":0.355623252 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1683183381 + "score":0.1290294373 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.4409361184 + "score":0.3807908275 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"be", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"translation_from", "metric":"bleu", - "score":0.1501419666 + "score":0.116157646 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"translation_from", "metric":"chrf", - "score":0.4658338479 + "score":0.4411553165 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"translation_to", "metric":"bleu", - "score":0.281865771 + "score":0.2284052455 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"translation_to", "metric":"chrf", - "score":0.4773167113 + "score":0.4432025312 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"bho", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.2591609762 + "score":0.1573424376 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.5223588424 + "score":0.3813908093 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1993415983 + "score":0.1860567167 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.4224866763 + "score":0.3798747224 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"bn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"bn", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2991941811 + "score":0.2047894665 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5494978816 + "score":0.4476643899 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3244067885 + "score":0.3413387194 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5219822848 + "score":0.5056140066 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ceb", + "task":"classification", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ceb", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ceb", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.4271806928 + "score":0.3321604587 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.6413850627 + "score":0.524735789 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.3702054313 + "score":0.399945485 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.6183232461 + "score":0.6275070378 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ckb", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", - "score":0.2020995263 + "score":0.0862560502 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", - "score":0.4532917138 + "score":0.2788047314 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", - "score":0.1831160509 + "score":0.0359802782 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", - "score":0.4629805537 + "score":0.2225612749 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"cs", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"cs", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.3165458461 + "score":0.2777777551 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.571286718 + "score":0.5317009045 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.3923540946 + "score":0.2755276023 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.6004577577 + "score":0.4907555325 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"de", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3466410252 + "score":0.2840890109 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5865767052 + "score":0.5146969249 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4978134672 + "score":0.3999539422 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6912703606 + "score":0.6267391818 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.308914586 + "score":0.2595428958 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.5426019264 + "score":0.4813680319 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.2768987754 + "score":0.3306804036 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.4945030297 + "score":0.4976939797 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.539334775 + "score":0.5232930808 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.7031277793 + "score":0.6688775695 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.6275460309 + "score":0.6469796865 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8258272044 + "score":0.8203785308 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"es", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"es", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.3148515413 + "score":0.2793939864 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5634810123 + "score":0.5176409834 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.4056181014 + "score":0.4118937163 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6446334193 + "score":0.6353341411 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"classification", "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"fa", + "task":"mgsm", + "metric":"accuracy", "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"fa", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.3057972645 + "score":0.2052699799 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5437056078 + "score":0.4764669046 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.2134083705 + "score":0.2131911377 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.4531825072 + "score":0.4147480093 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"fil", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3908134626 + "score":0.3062563146 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.6127114899 + "score":0.4925975136 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.3246807458 + "score":0.32039199 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.5949140146 + "score":0.5717901387 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"fr", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.3184811644 + "score":0.2706688563 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5751428267 + "score":0.5148499232 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5082124359 + "score":0.4808374237 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6953832529 + "score":0.6855290209 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0379690036 + "score":0.0270875349 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2450412363 + "score":0.2100353402 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0600253056 + "score":0.052858761 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.2209496659 + "score":0.1950018354 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"gu", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"gu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.2877530662 + "score":0.2245042279 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.5281343849 + "score":0.4426786034 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1402688672 + "score":0.1920269509 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4396874045 + "score":0.4643025206 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ha", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ha", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.2003303143 + "score":0.1154893286 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.4279958144 + "score":0.3792147754 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.2428342826 + "score":0.2191612695 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.5208238431 + "score":0.4879764503 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"hi", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3556331675 + "score":0.3473235908 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5802678717 + "score":0.5515454754 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.38273354 + "score":0.3991894826 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6044055498 + "score":0.6121310121 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.2441042617 + "score":0.247888062 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.509051591 + "score":0.4353918541 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1488771793 + "score":0.1626119723 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.3979048506 + "score":0.4423709529 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"hu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"hu", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2781326859 + "score":0.2640028594 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.5567082198 + "score":0.524505973 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.3429728329 + "score":0.3929863672 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.6056687314 + "score":0.5880857849 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"id", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.3462344182 + "score":0.2522725561 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5958151457 + "score":0.5212732474 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3538211232 + "score":0.2850030055 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6387367985 + "score":0.5970450995 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ig", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.1914060382 + "score":0.1326727529 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.4768796669 + "score":0.3646478687 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.1949774059 + "score":0.1882093096 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.4716938743 + "score":0.4009607044 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ilo", + "task":"mgsm", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", - "score":0.2651265931 + "score":0.1988516559 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", - "score":0.4942111299 + "score":0.405478436 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", - "score":0.1732257355 + "score":0.0961457593 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", - "score":0.4781715485 + "score":0.4060794313 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"it", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"it", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.3009538263 + "score":0.2746808629 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5501601568 + "score":0.5180176469 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3493795467 + "score":0.3112912727 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.6074911227 + "score":0.5712680542 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ja", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2184952439 + "score":0.2363319461 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.5137071093 + "score":0.4826308954 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2956671283 + "score":0.2593036542 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4548120956 + "score":0.4231415642 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"jv", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.3245483021 + "score":0.2480055389 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5653097878 + "score":0.4685108662 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2393376297 + "score":0.2241033812 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5786070517 + "score":0.5113817494 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"kk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"kk", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"kk", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.2332022845 + "score":0.147911394 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.5054273222 + "score":0.3985376686 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.3295494284 + "score":0.290182238 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.5830629495 + "score":0.5572310551 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"km", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"km", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.2813089854 + "score":0.2772807862 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.5573839136 + "score":0.504897576 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1797121389 + "score":0.1404234583 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3875039113 + "score":0.3189837953 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"kn", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"kn", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.2681346443 + "score":0.1849035655 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.516271515 + "score":0.430576325 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.3292637181 + "score":0.2444722013 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.5411335167 + "score":0.4742295195 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ko", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ko", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2062159679 + "score":0.2013601575 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4930481948 + "score":0.4475607863 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.199120471 + "score":0.1980395856 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.2986152055 + "score":0.2888993735 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", - "score":0.1624653517 + "score":0.042643493 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", - "score":0.3643510485 + "score":0.2339244707 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", - "score":0.0482270518 + "score":0.0143208425 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", - "score":0.2811541542 + "score":0.2382431413 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.3712275773 + "score":0.2373436047 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.6060872554 + "score":0.4564427975 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2415518345 + "score":0.2754056305 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.4847424345 + "score":0.5123611693 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"mai", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mai", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.3038147054 + "score":0.2027297928 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.5564897496 + "score":0.4495211176 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.1265912109 + "score":0.1639594712 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4505477793 + "score":0.4482904829 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"mg", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mg", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.2892690248 + "score":0.2035781185 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.5283814116 + "score":0.4598803974 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.2604484731 + "score":0.1964079195 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.5570920222 + "score":0.5179064416 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ml", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ml", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.3167316864 + "score":0.2246064108 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5615256782 + "score":0.4311975246 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.2233206779 + "score":0.2434125045 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4945538715 + "score":0.4971145063 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"mr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2372647899 + "score":0.1756463826 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.5201689948 + "score":0.3823527701 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2837915649 + "score":0.2340922946 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4968686069 + "score":0.4581322597 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ms", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.3817837328 + "score":0.2993296846 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.6303019898 + "score":0.5743132494 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.4073449446 + "score":0.3504238332 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6943366118 + "score":0.6154153931 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"my", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.3249100629 + "score":0.1741885177 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.5431426981 + "score":0.449774491 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.235732523 + "score":0.2106778 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.5043766555 + "score":0.4610458467 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ne", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ne", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.3096041789 + "score":0.2483418024 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.5699841909 + "score":0.4432537254 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.2732360862 + "score":0.2305902219 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.5138403369 + "score":0.5017217229 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"nl", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2771400437 + "score":0.2523126947 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.5234495758 + "score":0.4772912105 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.3125238973 + "score":0.3639443469 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.5924438438 + "score":0.6252850371 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ny", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ny", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ny", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ny", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.2155748492 + "score":0.0835095719 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.4825510875 + "score":0.2797853634 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.1108028659 + "score":0.0779315192 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.4669878278 + "score":0.4121236337 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"om", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.1205990098 + "score":0.0511068522 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.3774448724 + "score":0.2702934215 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0499279171 + "score":0.0724907554 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.3917438838 + "score":0.345324531 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"or", + "task":"mgsm", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"or", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.2796086811 + "score":0.1164462601 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.5313259159 + "score":0.3729006132 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1726812539 + "score":0.1379248705 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.4298144259 + "score":0.4079943111 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pa", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.4066850608 + "score":0.3640514137 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6424035216 + "score":0.5847142015 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4726631876 + "score":0.3988016179 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.6063294964 + "score":0.5745254523 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"pl", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.279299064 + "score":0.2319955399 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5370393541 + "score":0.4971130964 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3400908956 + "score":0.293769398 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5719220223 + "score":0.535772663 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ps", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ps", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"pt", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3091176603 + "score":0.3014354397 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5550502185 + "score":0.5315937202 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4520197651 + "score":0.4535066637 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6675992003 + "score":0.6773057972 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"qu", + "task":"mgsm", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ro", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ro", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.2929157497 + "score":0.2532461677 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5489344393 + "score":0.5201960699 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.4680709113 + "score":0.5052082065 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.656644785 + "score":0.6686611337 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ru", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2332295676 + "score":0.2270580453 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5393238215 + "score":0.5034759488 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3359555862 + "score":0.3258505825 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5768159572 + "score":0.5592402358 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", - "score":0.1988009754 + "score":0.0829790682 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", - "score":0.4678679556 + "score":0.337986391 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", - "score":0.2629383296 + "score":0.2129352292 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", - "score":0.5460945267 + "score":0.5084793087 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sd", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sd", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sd", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.3262698326 + "score":0.15024418 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.5582120149 + "score":0.3597265355 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.3228689165 + "score":0.2369214411 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.4980199993 + "score":0.4711257499 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"si", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"si", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.2183778489 + "score":0.0980707024 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.4879608634 + "score":0.3109100287 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.2090356271 + "score":0.1934430032 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.3834363403 + "score":0.3560526886 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.1117579943 + "score":0.0582100604 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.3651910045 + "score":0.3075785834 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.1550941263 + "score":0.1011833785 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.5033416845 + "score":0.4367282377 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"so", + "task":"mgsm", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"so", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.2638307043 + "score":0.1531795055 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.4819229353 + "score":0.3616443224 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.184886568 + "score":0.2049307012 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.4611492374 + "score":0.4719724156 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sr", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2842627945 + "score":0.2199024767 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.5655019867 + "score":0.4907562634 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.3294849496 + "score":0.3830980295 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5361539575 + "score":0.5736359642 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"su", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"su", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.2389196918 + "score":0.2379030124 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4797843722 + "score":0.4403417868 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.1891587494 + "score":0.2238060743 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.4981237511 + "score":0.5243303769 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.2815285609 + "score":0.2852268785 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5465800135 + "score":0.5304479976 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.3801186069 + "score":0.3829618265 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6436092706 + "score":0.6326982198 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sw", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sw", - "task":"mgsm", + "task":"classification", "metric":"accuracy", "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"mmlu", + "metric":"accuracy", + "score":0.4 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.3118480568 + "score":0.222064455 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.546369665 + "score":0.4652246692 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.3964352653 + "score":0.299635051 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6509673425 + "score":0.5860066036 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ta", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ta", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ta", + "task":"mgsm", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ta", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.261907541 + "score":0.1407382127 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.5163685859 + "score":0.3831149186 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.2722219181 + "score":0.214481784 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.532399617 + "score":0.4692538776 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"te", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.393859257 + "score":0.3646122831 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.6378203173 + "score":0.5746253001 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3327078813 + "score":0.3003064302 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5457177636 + "score":0.5444122929 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", - "score":0.2154192376 + "score":0.1128962774 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", - "score":0.4696470371 + "score":0.3493465213 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", - "score":0.2538014928 + "score":0.1841235337 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", - "score":0.5009654989 + "score":0.417497165 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"th", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"th", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.3143461477 + "score":0.2392194968 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.5485350923 + "score":0.4936638572 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.39778814 + "score":0.3622208845 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.5566425188 + "score":0.5010514821 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ti", + "task":"mgsm", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", - "score":0.1498024182 + "score":0.0408372058 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", - "score":0.3821120653 + "score":0.2042987422 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", - "score":0.0693185679 + "score":0.0231278614 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", - "score":0.1815602807 + "score":0.0849541719 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"tr", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"tr", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"tr", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2731125622 + "score":0.2621174982 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5386147699 + "score":0.5019200442 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3357059921 + "score":0.3816990204 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.6001525874 + "score":0.5999237379 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"uk", + "task":"mgsm", + "metric":"accuracy", + "score":0.9 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uk", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2620427472 + "score":0.2245951815 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.5463036194 + "score":0.4480996711 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.3493974795 + "score":0.3852002404 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5517845172 + "score":0.5711778517 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"classification", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", - "score":0.0571388098 + "score":0.0415789397 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", - "score":0.2146586416 + "score":0.1350551103 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", - "score":0.0411925065 + "score":0.0010517421 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", - "score":0.1869298599 + "score":0.0968599255 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"mgsm", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"ur", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.205946158 + "score":0.2436356521 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.5021595381 + "score":0.4877029713 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2933139984 + "score":0.2603784132 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4825872793 + "score":0.4734427307 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"uz", + "task":"mmlu", + "metric":"accuracy", + "score":0.5 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.2191445081 + "score":0.1411472616 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.4863403733 + "score":0.3855156193 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.2099586093 + "score":0.2021458884 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.5133143617 + "score":0.4930438511 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"mgsm", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"vi", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2583850394 + "score":0.1995232614 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5425702257 + "score":0.4582270744 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.418014366 + "score":0.2597310259 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.6308329518 + "score":0.514972808 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", - "score":0.0837728981 + "score":0.08218909 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", - "score":0.3448337519 + "score":0.2632475474 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", - "score":0.1156898441 + "score":0.0383287658 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", - "score":0.3359373421 + "score":0.1924695915 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.2145078635 + "score":0.159437398 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4857656934 + "score":0.373213248 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1355871579 + "score":0.1199632327 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.178819439 + "score":0.1706758411 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", - "score":0.2072124604 + "score":0.158569201 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", - "score":0.4729134507 + "score":0.3926886149 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", - "score":0.1306749519 + "score":0.0591321886 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", - "score":0.4877811483 + "score":0.3427023375 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"yo", + "task":"arc", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"yo", + "task":"mmlu", + "metric":"accuracy", + "score":0.3 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.1312045988 + "score":0.079919346 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.3655601583 + "score":0.279919938 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.1087966113 + "score":0.0720231313 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.2839573446 + "score":0.2187010976 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.2 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"yue", - "task":"classification", + "task":"arc", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"yue", + "task":"classification", + "metric":"accuracy", + "score":0.8 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"yue", + "task":"mgsm", + "metric":"accuracy", + "score":0.1 + }, + { + "model":"openai\/gpt-4o-mini", + "bcp_47":"yue", + "task":"mmlu", + "metric":"accuracy", + "score":0.6 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1766032126 + "score":0.1290283283 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4547311313 + "score":0.3797298683 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1904214808 + "score":0.1636055441 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2615689123 + "score":0.2449501177 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"zh", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":1.0 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"zh", + "task":"mmlu", + "metric":"accuracy", + "score":0.7 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.2192323177 + "score":0.1773725218 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.5110450766 + "score":0.458620733 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2696611562 + "score":0.2753136513 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.2988391516 + "score":0.3274827604 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", + "bcp_47":"zu", + "task":"arc", + "metric":"accuracy", + "score":1.0 + }, + { + "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.2825650355 + "task":"mmlu", + "metric":"accuracy", + "score":0.5 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"translation_from", - "metric":"chrf", - "score":0.5492218755 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"translation_to", "metric":"bleu", - "score":0.2611643189 + "score":0.1866603918 }, { - "model":"openai\/gpt-4.1", + "model":"openai\/gpt-4o-mini", "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5637612249 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2198316321 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.4708151995 + "score":0.4108538087 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", + "model":"openai\/gpt-4o-mini", + "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.2234579509 + "score":0.164847197 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", + "model":"openai\/gpt-4o-mini", + "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.4401488964 + "score":0.4524644478 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"classification", + "model":"openai\/gpt-4o-mini", + "bcp_47":"zu", + "task":"truthfulqa", "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ak", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.1056657743 + "score":0.055487033 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.3249231698 + "score":0.195788708 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0618207736 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.340632844 + "score":0.1124541522 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.1521950168 + "score":0.0818660054 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.3956387285 + "score":0.2443718379 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0953768122 + "score":0.0312578478 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.2497030659 + "score":0.0892192454 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.2413546506 + "score":0.1879703279 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.5235234652 + "score":0.4082307283 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.2271910382 + "score":0.2032527408 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.4908497482 + "score":0.4252508109 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2566573338 + "score":0.1511448079 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5183862763 + "score":0.3482345089 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3901123396 + "score":0.2660157525 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5771753105 + "score":0.4467729024 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1301518556 + "score":0.0827758372 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4290918442 + "score":0.2694627987 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1797566847 + "score":0.1091045232 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.4271851106 + "score":0.3272460856 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.1724971212 + "score":0.1266156847 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4012455839 + "score":0.3088203065 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2496844101 + "score":0.1843867072 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4720007075 + "score":0.3542134538 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.1595843783 + "score":0.1057754271 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.4348621346 + "score":0.2879318321 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.1540181476 + "score":0.0399332596 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.351822758 + "score":0.1436097331 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.2979811644 + "score":0.2082859775 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5341221534 + "score":0.3721700071 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.2462308641 + "score":0.1319221867 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.4432887674 + "score":0.3256562506 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", - "task":"classification", + "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", - "task":"mgsm", + "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", - "task":"mmlu", + "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1840798833 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4151816693 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1349305067 - }, - { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.397143235 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.1644448391 + "score":0.0834540968 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.4629038808 + "score":0.2549210714 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.2550498255 + "score":0.0664704876 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.4544854197 + "score":0.2928996174 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"bho", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.2230536146 + "score":0.1339563491 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.499651958 + "score":0.2272649787 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.2227847146 + "score":0.1668638517 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.40695057 + "score":0.3029135903 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"classification", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"bn", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2819696539 + "score":0.1789816822 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.5293077213 + "score":0.3618057577 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3092254935 + "score":0.1498239832 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.4661357412 + "score":0.2860628236 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"classification", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ceb", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.365645255 + "score":0.2518844765 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.5771393179 + "score":0.4578479284 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.3274189601 + "score":0.1735279991 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.5630354446 + "score":0.3732578771 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"classification", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"cs", + "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"mgsm", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"cs", + "task":"classification", "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.1379843601 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.3936670775 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.1018796158 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.3594406238 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.2953385985 + "score":0.1571790351 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.5543620654 + "score":0.3555264704 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.342919616 + "score":0.2188910218 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5729115023 + "score":0.3725672408 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.3052927761 + "score":0.1899770324 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5504382993 + "score":0.3001055745 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4246355556 + "score":0.3043234734 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6487523813 + "score":0.4952039495 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2853090403 + "score":0.1310803496 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.5033746216 + "score":0.2985898996 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.2976764649 + "score":0.1645861291 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.4568078793 + "score":0.2473851762 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5178458342 + "score":0.1962899792 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6792020066 + "score":0.316041879 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.6631992536 + "score":0.5755388881 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8257245236 + "score":0.7200533933 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.2800331904 + "score":0.1318044964 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5328441069 + "score":0.3364780931 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.3927902573 + "score":0.2564170555 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.628791549 + "score":0.4382398152 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.274202443 + "score":0.1533507904 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.5239221129 + "score":0.3596802703 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.144419277 + "score":0.1539918473 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.405746187 + "score":0.3118556242 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3517517227 + "score":0.2008801988 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.5678423102 + "score":0.3714765993 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.2861570496 + "score":0.1940537936 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.567530869 + "score":0.4792247969 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2950999056 + "score":0.1824127171 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5638983665 + "score":0.3884840193 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.5061822417 + "score":0.3876504315 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6903823708 + "score":0.5389108131 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0579371031 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2407036725 + "score":0.0738447366 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.0338899407 + "score":0.0240639493 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1685773285 + "score":0.1911668884 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.2249525185 + "score":0.1949542031 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.4726822454 + "score":0.4052894246 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1646493878 + "score":0.0646513332 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4071725376 + "score":0.1693257519 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.2293529776 + "score":0.0392475471 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.4521332467 + "score":0.126618872 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.2554536105 + "score":0.0445527444 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.5371463729 + "score":0.1499587951 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3531906075 + "score":0.2268010617 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.6060071382 + "score":0.3688527647 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.399293733 + "score":0.2230471235 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6132292528 + "score":0.4142622149 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.2232750657 + "score":0.1156575532 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.4801269988 + "score":0.2236151918 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1922860161 + "score":0.0698561479 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.4363534921 + "score":0.2305830236 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"classification", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"hu", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"hu", + "task":"classification", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2647815263 + "score":0.1691753276 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.5197043469 + "score":0.3383017469 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.3268056763 + "score":0.1871269984 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.5486126608 + "score":0.3731318657 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2559881532 + "score":0.1585058297 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5349715693 + "score":0.3050841055 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3900018149 + "score":0.1943793424 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6494354052 + "score":0.4973073268 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.1469460203 + "score":0.0963109471 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.3801695829 + "score":0.2727236688 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.1926475709 + "score":0.0660035863 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.4187626054 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "score":0.158376548 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"mgsm", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"it", + "task":"arc", "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1826483605 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4686350803 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1078652833 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.421647984 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.3047636442 + "score":0.1542082331 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5385736571 + "score":0.3028124272 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3458314466 + "score":0.2460472209 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5969984451 + "score":0.5145531621 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2100828863 + "score":0.1612360434 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4717405627 + "score":0.3349199354 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2844229339 + "score":0.117016066 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4435245651 + "score":0.208990655 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.2977682173 + "score":0.1729476776 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.5413323701 + "score":0.3959364431 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2537598479 + "score":0.1785425051 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5629521778 + "score":0.4249603279 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"classification", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"kk", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"kk", + "task":"classification", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.1646050237 + "score":0.1793896966 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.4508391233 + "score":0.4090639994 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.2752297553 + "score":0.1405729124 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.5180256955 + "score":0.3820186042 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.2132140468 + "score":0.0698832994 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.5000034068 + "score":0.2284330377 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1292151863 + "score":0.0851826028 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3392182289 + "score":0.1882485322 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.3 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.2382712271 + "score":0.1743539627 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.4901100456 + "score":0.4083936939 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.2584591395 + "score":0.1166173259 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.4586627531 + "score":0.2305285039 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.1783139223 + "score":0.1358227204 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4481556757 + "score":0.3423557444 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.3002017818 + "score":0.1953069902 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.3739762238 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0772118618 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2815494636 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.001488949 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1588971491 + "score":0.2564265013 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.3080966975 + "score":0.2314152421 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.5628489014 + "score":0.4725672887 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2981973224 + "score":0.17835674 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.5193764902 + "score":0.3229842432 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.2802761469 + "score":0.1533461204 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.5391751615 + "score":0.3472008961 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.2042851472 + "score":0.1168236528 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4615978684 + "score":0.2271783619 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.243797007 + "score":0.0928957375 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.4981055966 + "score":0.244366675 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.2038296766 + "score":0.0070995906 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.5453530515 + "score":0.1262437392 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.2649575888 + "score":0.1971269045 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5072138807 + "score":0.3735961781 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.186903033 + "score":0.1323978127 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.411527522 + "score":0.1893359682 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.2358876365 + "score":0.2135948303 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.4961149155 + "score":0.3958565999 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2674122275 + "score":0.1503233282 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4442281313 + "score":0.275821329 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.3443124421 + "score":0.1307137096 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5824988714 + "score":0.2920741112 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.3763691574 + "score":0.3539116395 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6619682382 + "score":0.5832656935 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.2596129619 + "score":0.1255424452 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.485235691 + "score":0.329335139 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.2234699025 + "score":0.0516414641 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.4866737746 + "score":0.1567420369 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.3085593402 + "score":0.2023089106 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.5486177789 + "score":0.3806028698 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.2319189577 + "score":0.0980312706 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4830752425 + "score":0.264649599 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2639124065 + "score":0.1993799127 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.5166255119 + "score":0.4072508102 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.342163716 + "score":0.213081855 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.6089275595 + "score":0.4114277627 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.1 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.158564127 + "score":0.0393547699 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.4086927045 + "score":0.163604057 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.1237632416 + "score":0.0139872791 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.4428640995 + "score":0.1446751186 + }, + { + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.1266863364 + "score":0.0330655518 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.3723937215 + "score":0.1646544216 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0507341481 + "score":0.0093749808 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.3372593565 + "score":0.1074800017 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.2495546416 + "score":0.223399015 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.505250418 + "score":0.4518680174 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1370990235 + "score":0.0545741621 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.4188964845 + "score":0.1291918248 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3898113091 + "score":0.1485025023 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.6349932626 + "score":0.3380088662 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4218934881 + "score":0.1374014148 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5699211354 + "score":0.2051503897 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2238263799 + "score":0.1586103513 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.5157447202 + "score":0.302194795 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3376407171 + "score":0.3897966488 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5674744623 + "score":0.5766535228 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2989764302 + "score":0.137323181 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5575461672 + "score":0.3095327986 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4347143661 + "score":0.2669467187 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6526848356 + "score":0.3969322178 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"qu", - "task":"mgsm", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ro", + "task":"arc", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.2365245444 + "score":0.1792721327 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5263187531 + "score":0.4176277039 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.4295443245 + "score":0.3094152813 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6303158648 + "score":0.4953887976 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2156464838 + "score":0.1584004696 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4961661832 + "score":0.38671906 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3525318267 + "score":0.3118021035 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5377697887 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "score":0.4665016839 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"mgsm", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"sd", + "task":"arc", "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1358779492 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.4047237198 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.2528406351 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5425926629 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.3052635197 + "score":0.1090127159 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.5444415164 + "score":0.2748895651 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.319777613 + "score":0.1361757276 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.5070316671 + "score":0.3342529311 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.224754909 + "score":0.1417180597 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.4942892862 + "score":0.3566626666 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.1763683901 + "score":0.0411134672 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.3592673643 + "score":0.1347885554 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0579407228 + "score":0.0397889862 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.3218620552 + "score":0.1881103773 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.1045487932 + "score":0.0165581412 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.4627951581 + "score":0.1414877053 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.1 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.2457303069 + "score":0.0815647227 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.4607096598 + "score":0.2487364334 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.2208751843 + "score":0.0253075503 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.499725177 + "score":0.2274689496 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2080392025 + "score":0.1882164689 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.5029005766 + "score":0.3943730373 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.3955111551 + "score":0.2381466467 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5840966612 + "score":0.4346543613 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.2108703792 + "score":0.15780181 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4463761953 + "score":0.3437437662 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.1957956536 + "score":0.1075072996 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.5109625366 + "score":0.4294170504 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.3026696791 + "score":0.1975945861 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5540321116 + "score":0.374398795 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.379491342 + "score":0.345880422 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6341129937 + "score":0.5436350308 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.2373352462 + "score":0.1913501957 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.4812305289 + "score":0.3785864037 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.3592771753 + "score":0.1219661246 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.6196466978 + "score":0.3307175909 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.2189074797 + "score":0.1483607311 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.458549356 + "score":0.3430470513 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.2096038798 + "score":0.1257952581 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.48158495 + "score":0.2720024162 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3847076164 + "score":0.378185741 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5961555843 + "score":0.557354327 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3031970309 + "score":0.1301924452 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.4938747459 + "score":0.2509083971 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"classification", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"th", + "task":"arc", "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1923337483 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4073259848 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.1777393755 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4229927395 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.1973765077 + "score":0.147695853 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.478302799 + "score":0.345415746 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.390011731 + "score":0.1876537733 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.5192332126 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":1.0 + "score":0.3005344914 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"mgsm", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"tr", + "task":"arc", "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0956165324 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.3010660185 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0201085128 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.1279466164 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.3027350341 + "score":0.2078229702 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5488245098 + "score":0.3841741235 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3543135567 + "score":0.147060653 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5739783335 + "score":0.3761003189 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2739321887 + "score":0.1363197738 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.523898319 + "score":0.2845774158 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.3702945368 + "score":0.3118632296 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5833117124 + "score":0.4917979058 }, { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"classification", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"ur", + "task":"arc", "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0456473272 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1799246176 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0060102851 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0643020373 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2282243664 + "score":0.1623147303 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.4878680978 + "score":0.3145943461 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2701355148 + "score":0.2226311966 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4485608146 + "score":0.4414805706 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.2068814622 + "score":0.0849999362 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.4820023997 + "score":0.2406274728 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.1797290418 + "score":0.279180562 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.5053214161 + "score":0.5116111495 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.7 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2800966186 + "score":0.1911759573 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5447813345 + "score":0.3812712354 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3743034645 + "score":0.2038650525 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5977965321 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0698928855 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2670199291 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0438604879 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.2188199264 + "score":0.4035183237 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.169474795 + "score":0.1435587328 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.4233016879 + "score":0.3490715453 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1115736327 + "score":0.1113617435 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1741292068 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1777667306 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4381801577 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0807523022 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4027285347 + "score":0.171370503 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.1 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0871363585 + "score":0.0366725514 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.3153084592 + "score":0.2007464145 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0712302827 + "score":0.0113521992 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.266201042 + "score":0.0657310926 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1722143774 + "score":0.1121528943 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.451617464 + "score":0.3178178007 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1362044502 + "score":0.1508604775 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2472412788 + "score":0.2175191576 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.211203078 + "score":0.1039910991 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4926704854 + "score":0.2728656752 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2808125016 + "score":0.2326991429 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3349070044 + "score":0.2529625335 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1986426867 + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"translation_from", - "metric":"chrf", - "score":0.4568960366 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"translation_to", "metric":"bleu", - "score":0.228999134 + "score":0.121119706 }, { - "model":"openai\/gpt-4.1-mini", + "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5092438205 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.1996215211 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.452996678 + "score":0.2540648952 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.1894542228 + "score":0.0235611585 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.4104759123 + "score":0.1930902578 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"classification", + "model":"qwen\/qwen3-235b-a22b", + "bcp_47":"zu", + "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ak", + "task":"classification", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.0772997859 + "score":0.0050031284 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.2795137394 + "score":0.0924942363 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.0490092548 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.2793012345 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.0821675771 + "score":0.0116797169 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.3261780265 + "score":0.0585141671 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0581954137 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.2133137227 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.2038935703 + "score":0.0764700894 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.4744865332 + "score":0.2647322624 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.198427289 + "score":0.0882851827 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.4466553325 + "score":0.2833576827 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2230716751 + "score":0.106692739 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.4868000305 + "score":0.295973969 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3431634646 + "score":0.0607816225 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5539675011 + "score":0.1277246917 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.0936861 + "score":0.0362905586 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4019740671 + "score":0.1081740165 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1320458692 + "score":0.1172745059 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3969132003 + "score":0.2687874448 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.147678651 + "score":0.0438294169 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.3870664018 + "score":0.108019816 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.1925682475 + "score":0.0673513704 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.424235974 + "score":0.2079164994 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.1549108661 + "score":0.0514848851 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.3657540248 + "score":0.1736145704 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.1300687711 + "score":0.0238993213 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.3850114254 + "score":0.1411125068 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.2822625676 + "score":0.1396565072 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.5120051075 + "score":0.2428031494 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.1956229389 + "score":0.0383901491 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.4165258378 + "score":0.1005302975 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.1020059939 + "score":0.0278489988 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.3685311802 + "score":0.1190175818 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1584050367 + "score":0.0367390088 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.4078207292 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.8 + "score":0.0930542371 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"mgsm", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"bho", + "task":"arc", "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.129620916 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4193741335 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.1911430477 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.423978547 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.1839199068 + "score":0.0736017029 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.4589371965 + "score":0.1960588462 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1754477624 + "score":0.0585115493 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.3729187467 + "score":0.1332846728 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"classification", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"bn", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2104863522 + "score":0.0429848247 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4534437048 + "score":0.2044845821 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.2824626 + "score":0.0299885561 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.4822940799 + "score":0.0829092043 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"classification", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ceb", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.6 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.3067568845 + "score":0.1804319747 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.522767718 + "score":0.3101350547 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.2603548365 + "score":0.0701534813 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.5472674101 + "score":0.1430960661 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"classification", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"cs", + "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"mgsm", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"cs", + "task":"classification", "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.061613272 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.2738044534 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0495010223 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.2930209689 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.5 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.2480975275 + "score":0.1415195376 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.4908345188 + "score":0.288893664 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.2780131154 + "score":0.1392279949 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.5272272242 + "score":0.2551496147 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.282506513 + "score":0.1564210937 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5438865496 + "score":0.3072042217 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.4220387975 + "score":0.2457069766 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6440212985 + "score":0.3367028296 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2138985353 + "score":0.0578542202 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.4787400928 + "score":0.1674077812 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3347502447 + "score":0.0663537525 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.5277403226 + "score":0.1816240149 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.4926163025 + "score":0.2474773351 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6598180449 + "score":0.3070994171 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.5627424753 + "score":0.4019192682 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.7654936904 + "score":0.4782905978 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.6 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.2400384539 + "score":0.1341566102 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.4846098061 + "score":0.2765542043 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.331169359 + "score":0.1161792768 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.541145091 + "score":0.2919898174 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.8 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.1934774812 + "score":0.0817283606 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.4555822394 + "score":0.241409878 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.1515704996 + "score":0.0476911924 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.4288811212 + "score":0.1464645136 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3049498802 + "score":0.1279159996 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.531726813 + "score":0.2829362826 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.2572361601 + "score":0.0388514243 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.5465796366 + "score":0.1409030042 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2245970544 + "score":0.134253193 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.4773636644 + "score":0.2989125898 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.456500631 + "score":0.2077048384 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6519350009 + "score":0.2946343811 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0508716923 + "score":0.0181579676 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2061725545 + "score":0.0543819937 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.001678581 + "score":0.0084320884 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.0842472305 + "score":0.0454173788 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.1811700298 + "score":0.0897072533 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.4450724584 + "score":0.200957971 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1152635411 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.3751269086 + "score":0.0377543414 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.1042620188 + "score":0.0111815534 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.353512414 + "score":0.1020769257 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.1697643488 + "score":0.021314569 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.4858315893 + "score":0.0830626431 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3100386494 + "score":0.1220519036 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5747433617 + "score":0.2639335507 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3858833658 + "score":0.0973415259 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.5991711103 + "score":0.2016493248 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.1533855474 + "score":0.0349119748 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.4227840042 + "score":0.1556602705 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1131265551 + "score":0.0494255017 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.3711711494 + "score":0.1669634575 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"classification", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"hu", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"hu", + "task":"classification", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2089476707 + "score":0.1211482424 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.4868357652 + "score":0.2505103528 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.2953914361 + "score":0.164473668 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.5360583303 + "score":0.3043231306 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.2 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.1907075731 + "score":0.0833080649 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.4662972265 + "score":0.2451270085 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.3386484563 + "score":0.1631993738 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.6376664219 + "score":0.3566219474 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.0974819198 + "score":0.0073674163 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.3736857308 + "score":0.0518830276 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.170722725 + "score":0.0292915569 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.4039469282 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 + "score":0.0528593127 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"mgsm", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"it", + "task":"arc", "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1538751748 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.3921570735 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1222763549 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.3651682861 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.2389788634 + "score":0.1046004559 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.4950691973 + "score":0.2507392983 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3197810714 + "score":0.1255584711 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5649240218 + "score":0.2268219009 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2240038475 + "score":0.0941710304 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4860646744 + "score":0.2704334257 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2284065848 + "score":0.0992512617 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.3753787999 + "score":0.1515216003 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.1867349669 + "score":0.1016751568 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4122967846 + "score":0.2254142889 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.20591358 + "score":0.1238616028 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5231507594 + "score":0.3669735224 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"classification", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"kk", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"kk", + "task":"classification", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.1757000759 + "score":0.0534454977 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.4569938635 + "score":0.193605134 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.2608919204 + "score":0.0265149039 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.495117819 + "score":0.1928985788 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.1391396286 + "score":0.0889265704 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.3419293202 + "score":0.2173396783 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.11776525 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3296737913 + "score":0.0392373629 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.1659549387 + "score":0.0711012835 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.4341818109 + "score":0.1937626852 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.1728970527 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.4006604704 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.1418786679 + "score":0.0417334285 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4096096806 + "score":0.2140485243 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.2767863837 + "score":0.0725025436 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.343465352 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0557337494 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2661879916 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.010496354 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2072817599 + "score":0.07710948 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.2919442529 + "score":0.0771251025 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.558994569 + "score":0.2350655007 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2232843577 + "score":0.1021996092 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.4785649547 + "score":0.2179221866 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.1885764001 + "score":0.0444196112 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.4905954379 + "score":0.2032069734 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.1518354017 + "score":0.0626942847 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4184448049 + "score":0.1962655319 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.2031343023 + "score":0.0065978992 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.4809424331 + "score":0.0423907689 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.1531227243 + "score":0.0220679536 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.486171029 + "score":0.0589969179 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.2649114053 + "score":0.1351914125 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.5258722646 + "score":0.2585829871 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.1638513843 + "score":0.0901612807 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.3671674679 + "score":0.1634932067 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.1755855974 + "score":0.0940283278 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.464470709 + "score":0.2412937356 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2053629902 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4466569291 + "score":0.1354857092 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.2391713081 + "score":0.0831447868 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5284921106 + "score":0.2593808275 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.3851770392 + "score":0.2740792798 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6778949951 + "score":0.4472159864 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.2 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.1921402736 + "score":0.0693332571 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.4286234239 + "score":0.1429077445 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.226941594 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.4545167964 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.2044174225 + "score":0.0671076397 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.4393220695 + "score":0.2039597866 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.204358035 + "score":0.0167754523 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.4838815717 + "score":0.1541036377 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.222496921 + "score":0.1500475481 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.455228974 + "score":0.2732969464 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.2971608126 + "score":0.2819860484 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.5809601739 + "score":0.4877091208 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0616783152 + "score":0.0158298608 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.2486430016 + "score":0.116087277 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.1096684518 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.4236658223 + "score":0.0604762339 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0470010342 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.3053087334 + "score":0.0477354473 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0354609608 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.3000471846 + "score":0.0430498724 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.1342178934 + "score":0.0224903847 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.4186845018 + "score":0.1205338978 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.0959778877 + "score":0.0083749469 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.4062370429 + "score":0.0240423066 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3790281875 + "score":0.1058521796 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.5948460259 + "score":0.2379060391 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.4122107278 + "score":0.0340105109 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5709045042 + "score":0.1018981548 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.1956391774 + "score":0.1200562696 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.4696282098 + "score":0.2833586847 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.3089097764 + "score":0.0585253067 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.5592753275 + "score":0.226893054 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.2493437671 + "score":0.0734185487 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.4876335319 + "score":0.2234046866 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.3816451478 + "score":0.2994720627 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6189446172 + "score":0.4412841692 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"qu", - "task":"mgsm", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ro", + "task":"arc", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.2159926241 + "score":0.0737479957 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.485645425 + "score":0.2334413367 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.4231210461 + "score":0.2671240661 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.611328256 + "score":0.4582064143 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2106935755 + "score":0.0655622212 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.4916756186 + "score":0.2295301444 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.2957139688 + "score":0.1421377727 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5505026606 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "score":0.3364832122 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"mgsm", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"sd", + "task":"arc", "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0983614688 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.3467631983 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.2084502331 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5081363979 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.183239364 + "score":0.0729593007 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.4023445581 + "score":0.1686728011 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.188899922 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.3987821089 + "score":0.0254323861 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.2 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.1067913788 + "score":0.0173772616 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.3496287521 + "score":0.094294748 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.1841725143 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.3632256251 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0529920463 + "score":0.0249272374 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.282085967 + "score":0.1318435849 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.1362552545 + "score":0.0114625376 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.4625012714 + "score":0.0309494652 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.1467149035 + "score":0.018362811 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.3956649623 + "score":0.0903053603 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.1685063005 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.4680460244 + "score":0.0361908088 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.1938115187 + "score":0.0697403266 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.4876215653 + "score":0.2326685525 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.301648159 + "score":0.0901351217 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5098794037 + "score":0.1761295618 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.1547225512 + "score":0.0358186402 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.3915293941 + "score":0.1577931474 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.1695373764 + "score":0.0657495832 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.4747320433 + "score":0.2377800817 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.2691126673 + "score":0.1439757229 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.4857803464 + "score":0.3280954777 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.3512121942 + "score":0.1315707916 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6095777745 + "score":0.2532056747 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.1987953868 + "score":0.0822395206 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.4232825095 + "score":0.2293817888 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.2845246017 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.5836686109 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.1209729479 + "score":0.0711409402 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.3863152501 + "score":0.2295116951 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.2257337081 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.4945472603 + "score":0.0281826938 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3081208582 + "score":0.0907703349 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5470122853 + "score":0.2172508025 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.2720935434 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.4603538628 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"classification", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"th", + "task":"arc", "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1562871243 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4076252967 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.1559524999 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4140020888 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.1699563701 + "score":0.0867988442 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.4279668426 + "score":0.2789247181 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.3008412738 + "score":0.0817852216 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.4707696326 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.6 + "score":0.1305309896 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"mgsm", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"tr", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0460275677 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.2471496791 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0301094125 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0934926984 - }, - { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.1772339365 + "score":0.134883333 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.4203181275 + "score":0.2942401793 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3130066985 + "score":0.1382385998 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5687455638 + "score":0.250580016 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2094411351 + "score":0.0975469561 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.4706103434 + "score":0.2592386604 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.3725710921 + "score":0.11477212 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5552868727 + "score":0.2025044003 }, { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"classification", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"ur", + "task":"arc", "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0334143542 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1507136538 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0016606076 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0502679049 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.1864708336 + "score":0.1035672471 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.4670816214 + "score":0.2255615453 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2648238029 + "score":0.1161482705 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4478960511 + "score":0.2453264465 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.2 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.1939396294 + "score":0.069654902 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.4361718347 + "score":0.2336481279 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.1189904742 + "score":0.0972829087 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.4347992199 + "score":0.2111334793 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.2 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.2392083536 + "score":0.0562684736 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.5109371286 + "score":0.2112928198 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.3593480951 + "score":0.039901967 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.5929525126 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0586128965 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2347632724 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0021532802 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1133302543 + "score":0.2071350414 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.160147676 + "score":0.092781215 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.391740055 + "score":0.2596321396 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.0779637528 + "score":0.0743041275 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1486256305 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1080830211 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.337342999 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0541477061 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3900223164 + "score":0.1172524094 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.0799412014 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.2678562615 + "score":0.0559568244 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0384415516 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.2172940187 + "score":0.0188433826 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1368607253 + "score":0.0807367939 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.4397284879 + "score":0.2250049533 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1909241711 + "score":0.1178746954 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2765267822 + "score":0.1300639553 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1574414981 + "score":0.1028766672 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.4616304665 + "score":0.2670830369 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2468189144 + "score":0.1623451886 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3136635386 + "score":0.2013735709 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1806603372 + "task":"mmlu", + "metric":"accuracy", + "score":0.0 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"translation_from", - "metric":"chrf", - "score":0.4477026286 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"translation_to", "metric":"bleu", - "score":0.1983500358 + "score":0.0409146661 }, { - "model":"openai\/gpt-4.1-nano", + "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.4823277126 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2046887048 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", - "score":0.3905043974 + "score":0.1675889915 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.2440190587 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.4467530618 + "score":0.0850348967 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"classification", + "model":"qwen\/qwen3-30b-a3b", + "bcp_47":"zu", + "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", + "model":"qwen\/qwen3-32b", + "bcp_47":"ak", + "task":"arc", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", + "model":"qwen\/qwen3-32b", + "bcp_47":"ak", + "task":"classification", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ak", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ak", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", - "score":0.1088055906 + "score":0.0197191428 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", - "score":0.2952376966 + "score":0.1664518353 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", - "score":0.04860361 + "score":0.022953237 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", - "score":0.2749922921 + "score":0.1745004402 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"arc", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", - "score":0.080495827 + "score":0.0796218409 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", - "score":0.287512266 + "score":0.2147354921 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", - "score":0.0649609212 + "score":0.010900097 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", - "score":0.203944936 + "score":0.0657137696 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"am", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", - "score":0.2086617902 + "score":0.132513614 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", - "score":0.4774317011 + "score":0.3795883854 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", - "score":0.2730334942 + "score":0.137952669 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", - "score":0.5458981435 + "score":0.3264835371 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ar", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.4 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", - "score":0.2530052174 + "score":0.1656993834 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", - "score":0.5158812138 + "score":0.3867250082 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", - "score":0.3439536667 + "score":0.2206826239 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", - "score":0.5691908832 + "score":0.4063080067 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", - "score":0.1377297001 + "score":0.1079587982 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", - "score":0.4304104417 + "score":0.3785381059 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", - "score":0.1906837255 + "score":0.1218401593 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", - "score":0.3931621016 + "score":0.3257594737 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", - "score":0.1573943285 + "score":0.1042685601 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", - "score":0.4039837102 + "score":0.3470585369 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", - "score":0.2385684611 + "score":0.1401256855 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", - "score":0.4690487202 + "score":0.3746457154 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"as", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", - "score":0.1786795263 + "score":0.12200123 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", - "score":0.4382834543 + "score":0.3327938776 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", - "score":0.142030089 + "score":0.0166926581 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", - "score":0.3706217658 + "score":0.0857862708 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"awa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", - "score":0.2061194828 + "score":0.2393773898 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", - "score":0.38382712 + "score":0.480146856 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", - "score":0.2351754729 + "score":0.13553124 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", - "score":0.4412955741 + "score":0.3086397875 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"az", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"translation_from", "metric":"bleu", - "score":0.1025818924 + "score":0.0984273348 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"translation_from", "metric":"chrf", - "score":0.355623252 + "score":0.2705806557 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"translation_to", "metric":"bleu", - "score":0.1290294373 + "score":0.0899956365 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"translation_to", "metric":"chrf", - "score":0.3807908275 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.8 + "score":0.3292198004 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"mgsm", + "model":"qwen\/qwen3-32b", + "bcp_47":"bho", + "task":"arc", "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.116157646 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4411553165 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2284052455 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4432025312 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", - "score":0.1573424376 + "score":0.1944585572 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", - "score":0.3813908093 + "score":0.4135649539 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", - "score":0.1860567167 + "score":0.0841754475 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", - "score":0.3798747224 + "score":0.2832267135 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"classification", + "model":"qwen\/qwen3-32b", + "bcp_47":"bn", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", - "score":0.2047894665 + "score":0.1905773039 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", - "score":0.4476643899 + "score":0.3974640862 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", - "score":0.3413387194 + "score":0.1738721227 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", - "score":0.5056140066 + "score":0.3341823126 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"classification", + "model":"qwen\/qwen3-32b", + "bcp_47":"ceb", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", - "score":0.3321604587 + "score":0.2913417198 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", - "score":0.524735789 + "score":0.4715886747 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", - "score":0.399945485 + "score":0.2042487615 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", - "score":0.6275070378 + "score":0.3926894761 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"classification", + "model":"qwen\/qwen3-32b", + "bcp_47":"cs", + "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"mgsm", + "model":"qwen\/qwen3-32b", + "bcp_47":"cs", + "task":"classification", "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.0862560502 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.2788047314 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0359802782 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.2225612749 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"cs", - "task":"classification", + "task":"mgsm", "metric":"accuracy", "score":0.8 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"cs", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", - "score":0.2777777551 + "score":0.1724601448 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", - "score":0.5317009045 + "score":0.3857736694 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", - "score":0.2755276023 + "score":0.2045983077 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", - "score":0.4907555325 + "score":0.4057824257 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"de", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", - "score":0.2840890109 + "score":0.2174481184 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", - "score":0.5146969249 + "score":0.4124810034 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", - "score":0.3999539422 + "score":0.3022742815 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", - "score":0.6267391818 + "score":0.4968168009 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"el", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.4 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"el", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", - "score":0.2595428958 + "score":0.1714951139 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", - "score":0.4813680319 + "score":0.3572714199 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", - "score":0.3306804036 + "score":0.1874209861 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", - "score":0.4976939797 + "score":0.3148917242 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.8 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", - "score":0.5232930808 + "score":0.446850518 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", - "score":0.6688775695 + "score":0.5288962517 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", - "score":0.6469796865 + "score":0.6607457062 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", - "score":0.8203785308 + "score":0.828511917 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"en", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.8 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", - "score":0.2793939864 + "score":0.2056231855 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", - "score":0.5176409834 + "score":0.4306632094 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", - "score":0.4118937163 + "score":0.279042145 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", - "score":0.6353341411 + "score":0.5148983586 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"fa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", - "score":0.2052699799 + "score":0.0657228626 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", - "score":0.4764669046 + "score":0.3139715852 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", - "score":0.2131911377 + "score":0.1461056975 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", - "score":0.4147480093 + "score":0.3475439511 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", - "score":0.3062563146 + "score":0.2438433878 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", - "score":0.4925975136 + "score":0.4512223379 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", - "score":0.32039199 + "score":0.2208119792 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", - "score":0.5717901387 + "score":0.4378335772 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"fr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", - "score":0.2706688563 + "score":0.1721857235 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", - "score":0.5148499232 + "score":0.477225501 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", - "score":0.4808374237 + "score":0.4037533819 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", - "score":0.6855290209 + "score":0.6067640163 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", - "score":0.0270875349 + "score":0.0224194954 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", - "score":0.2100353402 + "score":0.105046056 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", - "score":0.052858761 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", - "score":0.1950018354 + "score":0.082276319 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"gu", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.6 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", - "score":0.2245042279 + "score":0.1698272846 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", - "score":0.4426786034 + "score":0.3349276506 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", - "score":0.1920269509 + "score":0.0740380781 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", - "score":0.4643025206 + "score":0.2296558189 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", - "score":0.1154893286 + "score":0.0341558033 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", - "score":0.3792147754 + "score":0.1515090956 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", - "score":0.2191612695 + "score":0.0178215481 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", - "score":0.4879764503 + "score":0.206138289 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ha", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", - "score":0.3473235908 + "score":0.220218347 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", - "score":0.5515454754 + "score":0.4387911559 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", - "score":0.3991894826 + "score":0.2024184343 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", - "score":0.6121310121 + "score":0.4252839653 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", - "score":0.247888062 + "score":0.1375555656 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", - "score":0.4353918541 + "score":0.3016838615 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", - "score":0.1626119723 + "score":0.1141821718 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", - "score":0.4423709529 + "score":0.3531692508 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"classification", + "model":"qwen\/qwen3-32b", + "bcp_47":"hu", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", + "model":"qwen\/qwen3-32b", + "bcp_47":"hu", + "task":"classification", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hu", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hu", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", - "score":0.2640028594 + "score":0.201485104 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", - "score":0.524505973 + "score":0.4060774974 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", - "score":0.3929863672 + "score":0.1845697152 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", - "score":0.5880857849 + "score":0.3651920542 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"id", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", - "score":0.2522725561 + "score":0.2119242961 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", - "score":0.5212732474 + "score":0.4027870816 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", - "score":0.2850030055 + "score":0.2845893115 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", - "score":0.5970450995 + "score":0.5618854988 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ig", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", - "score":0.1326727529 + "score":0.0631617801 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", - "score":0.3646478687 + "score":0.2207616259 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", - "score":0.1882093096 + "score":0.0508365473 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", - "score":0.4009607044 + "score":0.1697158135 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"mgsm", + "model":"qwen\/qwen3-32b", + "bcp_47":"it", + "task":"arc", "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1988516559 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.405478436 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0961457593 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4060794313 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", - "score":0.2746808629 + "score":0.1840631549 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", - "score":0.5180176469 + "score":0.4301865089 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", - "score":0.3112912727 + "score":0.2482222138 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", - "score":0.5712680542 + "score":0.5228204728 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ja", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", - "score":0.2363319461 + "score":0.1127930596 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", - "score":0.4826308954 + "score":0.2787849105 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", - "score":0.2593036542 + "score":0.1840671906 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", - "score":0.4231415642 + "score":0.3135227124 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"jv", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", - "score":0.2480055389 + "score":0.1834764341 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", - "score":0.4685108662 + "score":0.3297107768 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", - "score":0.2241033812 + "score":0.1569316995 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", - "score":0.5113817494 + "score":0.467165329 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"classification", + "model":"qwen\/qwen3-32b", + "bcp_47":"kk", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", + "model":"qwen\/qwen3-32b", + "bcp_47":"kk", + "task":"classification", + "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kk", - "task":"classification", + "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kk", - "task":"mgsm", + "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", - "score":0.147911394 + "score":0.1096372066 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", - "score":0.3985376686 + "score":0.2865411962 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", - "score":0.290182238 + "score":0.1388075288 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", - "score":0.5572310551 + "score":0.3285307881 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"km", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.3 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", - "score":0.2772807862 + "score":0.170745871 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", - "score":0.504897576 + "score":0.3490488807 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", - "score":0.1404234583 + "score":0.0792740607 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", - "score":0.3189837953 + "score":0.2285805687 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"kn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", - "score":0.1849035655 + "score":0.1679784179 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", - "score":0.430576325 + "score":0.3620246212 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", - "score":0.2444722013 + "score":0.1242897501 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", - "score":0.4742295195 + "score":0.3007681742 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ko", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", - "score":0.2013601575 + "score":0.1276543618 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", - "score":0.4475607863 + "score":0.3677051571 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", - "score":0.1980395856 + "score":0.1980497946 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", - "score":0.2888993735 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.042643493 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2339244707 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0143208425 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2382431413 + "score":0.2107568779 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", - "score":0.2373436047 + "score":0.2450449733 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", - "score":0.4564427975 + "score":0.457594122 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", - "score":0.2754056305 + "score":0.198485011 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", - "score":0.5123611693 + "score":0.3848787397 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"mai", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.5 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", - "score":0.2027297928 + "score":0.1927814544 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", - "score":0.4495211176 + "score":0.3935457095 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", - "score":0.1639594712 + "score":0.0565208468 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", - "score":0.4482904829 + "score":0.3131063701 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"mg", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", - "score":0.2035781185 + "score":0.0354662811 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", - "score":0.4598803974 + "score":0.1686638218 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", - "score":0.1964079195 + "score":0.0314143451 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", - "score":0.5179064416 + "score":0.2728599885 + }, + { + "model":"qwen\/qwen3-32b", + "bcp_47":"ml", + "task":"arc", + "metric":"accuracy", + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", - "score":0.2246064108 + "score":0.2371717296 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", - "score":0.4311975246 + "score":0.4148173757 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", - "score":0.2434125045 + "score":0.1193636287 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", - "score":0.4971145063 + "score":0.2090448587 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"mr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.6 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", - "score":0.1756463826 + "score":0.1181276928 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", - "score":0.3823527701 + "score":0.3044523516 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", - "score":0.2340922946 + "score":0.0922160441 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", - "score":0.4581322597 + "score":0.2284498534 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ms", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", - "score":0.2993296846 + "score":0.1968422851 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", - "score":0.5743132494 + "score":0.4053316305 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", - "score":0.3504238332 + "score":0.3766906478 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", - "score":0.6154153931 + "score":0.6371848492 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"my", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.3 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", - "score":0.1741885177 + "score":0.1225410694 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", - "score":0.449774491 + "score":0.3037176244 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", - "score":0.2106778 + "score":0.1060809306 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", - "score":0.4610458467 + "score":0.2215712232 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ne", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.3 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", - "score":0.2483418024 + "score":0.1375199333 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", - "score":0.4432537254 + "score":0.3471041134 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", - "score":0.2305902219 + "score":0.1115971998 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", - "score":0.5017217229 + "score":0.2915076183 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"nl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.5 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", - "score":0.2523126947 + "score":0.2385777935 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", - "score":0.4772912105 + "score":0.46025989 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", - "score":0.3639443469 + "score":0.2726117583 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", - "score":0.6252850371 + "score":0.5059991136 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ny", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", - "score":0.0835095719 + "score":0.0351245421 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", - "score":0.2797853634 + "score":0.1823407405 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", - "score":0.0779315192 + "score":0.0228348515 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", - "score":0.4121236337 + "score":0.2201854752 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"om", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", - "score":0.0511068522 + "score":0.008627568 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", - "score":0.2702934215 + "score":0.190294404 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", - "score":0.0724907554 + "score":0.0084651752 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", - "score":0.345324531 + "score":0.1971638266 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"or", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", - "score":0.5 + "score":0.6 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", - "score":0.1164462601 + "score":0.1927446862 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", - "score":0.3729006132 + "score":0.3892188652 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", - "score":0.1379248705 + "score":0.0747658241 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", - "score":0.4079943111 + "score":0.2195369005 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"pa", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", - "score":0.3640514137 + "score":0.3258973448 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", - "score":0.5847142015 + "score":0.4626835685 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", - "score":0.3988016179 + "score":0.2107798391 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", - "score":0.5745254523 + "score":0.3008568297 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"pl", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", - "score":0.2319955399 + "score":0.1854293513 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", - "score":0.4971130964 + "score":0.4307605073 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", - "score":0.293769398 + "score":0.2420005385 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", - "score":0.535772663 + "score":0.4288325052 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ps", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.5 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"pt", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", - "score":0.3014354397 + "score":0.2177730164 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", - "score":0.5315937202 + "score":0.433438268 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", - "score":0.4535066637 + "score":0.3402518575 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", - "score":0.6773057972 + "score":0.4997252818 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"qu", - "task":"mgsm", + "model":"qwen\/qwen3-32b", + "bcp_47":"ro", + "task":"arc", "metric":"accuracy", - "score":0.2 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", - "score":0.2532461677 + "score":0.1593298949 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", - "score":0.5201960699 + "score":0.4017223467 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", - "score":0.5052082065 + "score":0.2726504789 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", - "score":0.6686611337 + "score":0.4406178765 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"ru", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", - "score":0.2270580453 + "score":0.1869622361 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", - "score":0.5034759488 + "score":0.4301337345 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", - "score":0.3258505825 + "score":0.2546688585 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", - "score":0.5592402358 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.8 + "score":0.3959108821 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"mgsm", + "model":"qwen\/qwen3-32b", + "bcp_47":"sd", + "task":"arc", "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0829790682 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.337986391 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.2129352292 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5084793087 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", - "score":0.15024418 + "score":0.1610657464 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", - "score":0.3597265355 + "score":0.3043802738 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", - "score":0.2369214411 + "score":0.0315314884 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", - "score":0.4711257499 + "score":0.2120098132 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"si", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", - "score":0.0980707024 + "score":0.0130905001 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", - "score":0.3109100287 + "score":0.1973781543 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", - "score":0.1934430032 + "score":0.0134555536 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", - "score":0.3560526886 + "score":0.1176319627 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"sn", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"sn", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", - "score":0.0582100604 + "score":0.0330243636 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", - "score":0.3075785834 + "score":0.1963889628 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", - "score":0.1011833785 + "score":0.0428214603 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", - "score":0.4367282377 + "score":0.2352543457 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"so", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", - "score":0.4 + "score":0.3 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", - "score":0.1531795055 + "score":0.0329294407 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", - "score":0.3616443224 + "score":0.1742241015 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", - "score":0.2049307012 + "score":0.0243956065 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", - "score":0.4719724156 + "score":0.1607978429 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"sr", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.4 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", - "score":0.2199024767 + "score":0.1604574347 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", - "score":0.4907562634 + "score":0.4084047683 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", - "score":0.3830980295 + "score":0.1664454505 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", - "score":0.5736359642 + "score":0.34203002 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"su", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.4 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", - "score":0.2379030124 + "score":0.1257406217 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", - "score":0.4403417868 + "score":0.3378715267 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", - "score":0.2238060743 + "score":0.0667296519 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", - "score":0.5243303769 + "score":0.3980540266 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"sv", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"sv", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", - "score":0.2852268785 + "score":0.1672894127 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", - "score":0.5304479976 + "score":0.3684788102 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", - "score":0.3829618265 + "score":0.3614873089 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", - "score":0.6326982198 + "score":0.5392220773 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"arc", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", - "score":0.7 + "score":0.8 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", - "score":0.4 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", - "score":0.222064455 + "score":0.1200999603 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", - "score":0.4652246692 + "score":0.2908221442 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", - "score":0.299635051 + "score":0.023689627 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", - "score":0.5860066036 + "score":0.2453164021 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"sw", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", + "bcp_47":"ta", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", - "score":0.8 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", - "score":0.1407382127 + "score":0.1882602024 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", - "score":0.3831149186 + "score":0.4059862729 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", - "score":0.214481784 + "score":0.0841932466 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", - "score":0.4692538776 + "score":0.260536888 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"te", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", - "score":0.3646122831 + "score":0.3422950731 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", - "score":0.5746253001 + "score":0.4841100904 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", - "score":0.3003064302 + "score":0.1765162745 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", - "score":0.5444122929 + "score":0.3133803312 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"classification", + "model":"qwen\/qwen3-32b", + "bcp_47":"th", + "task":"arc", "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1128962774 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.3493465213 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.1841235337 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.417497165 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", - "score":0.2392194968 + "score":0.196772439 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", - "score":0.4936638572 + "score":0.4191567084 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", - "score":0.3622208845 + "score":0.2773920621 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", - "score":0.5010514821 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 + "score":0.4482220675 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"mgsm", + "model":"qwen\/qwen3-32b", + "bcp_47":"tr", + "task":"arc", "metric":"accuracy", "score":0.0 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0408372058 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.2042987422 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0231278614 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0849541719 - }, - { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", - "score":0.2621174982 + "score":0.2102634926 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", - "score":0.5019200442 + "score":0.4159372483 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", - "score":0.3816990204 + "score":0.2364464274 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", - "score":0.5999237379 + "score":0.4789525721 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"uk", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", - "score":0.9 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", - "score":0.2245951815 + "score":0.1579324347 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", - "score":0.4480996711 + "score":0.3647294785 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", - "score":0.3852002404 + "score":0.3006688281 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", - "score":0.5711778517 + "score":0.468364849 }, { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"classification", + "model":"qwen\/qwen3-32b", + "bcp_47":"ur", + "task":"arc", "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0415789397 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1350551103 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0010517421 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0968599255 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", - "score":0.2436356521 + "score":0.1705686173 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", - "score":0.4877029713 + "score":0.3937623183 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", - "score":0.2603784132 + "score":0.1711380057 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", - "score":0.4734427307 + "score":0.3502063066 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"uz", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", - "score":0.5 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", - "score":0.1411472616 + "score":0.1315627205 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", - "score":0.3855156193 + "score":0.3801109933 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", - "score":0.2021458884 + "score":0.1184994967 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", - "score":0.4930438511 + "score":0.3552530055 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"vi", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", - "score":0.6 + "score":0.7 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", - "score":0.1995232614 + "score":0.2826147232 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", - "score":0.4582270744 + "score":0.5352078445 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", - "score":0.2597310259 + "score":0.2891587891 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", - "score":0.514972808 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.08218909 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2632475474 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0383287658 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1924695915 + "score":0.5030568081 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", - "score":0.159437398 + "score":0.1923006081 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", - "score":0.373213248 + "score":0.3850610484 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", - "score":0.1199632327 + "score":0.0704239199 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", - "score":0.1706758411 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.158569201 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.3926886149 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0591321886 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3427023375 + "score":0.1309128692 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"arc", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.1 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", - "score":0.079919346 + "score":0.0078376559 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", - "score":0.279919938 + "score":0.1634867622 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", - "score":0.0720231313 + "score":0.028486223 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", - "score":0.2187010976 + "score":0.0875657048 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"yo", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", + "bcp_47":"yue", + "task":"arc", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", - "score":0.8 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", - "score":0.1 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", - "score":0.6 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", - "score":0.1290283283 + "score":0.1257691602 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", - "score":0.3797298683 + "score":0.3303444225 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", - "score":0.1636055441 + "score":0.159764099 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", - "score":0.2449501177 + "score":0.2311709663 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", - "score":1.0 + "score":0.4 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", - "score":0.7 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", - "score":0.1773725218 + "score":0.170228681 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", - "score":0.458620733 + "score":0.4530475535 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", - "score":0.2753136513 + "score":0.2387260041 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", - "score":0.3274827604 + "score":0.288127087 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"arc", "metric":"accuracy", - "score":1.0 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", - "score":0.9 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", - "score":0.3 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", + "bcp_47":"zu", + "task":"mmlu", + "metric":"accuracy", + "score":0.0 + }, + { + "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", - "score":0.1866603918 + "score":0.04772924 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", - "score":0.4108538087 + "score":0.237905051 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", - "score":0.164847197 + "score":0.0 }, { - "model":"openai\/gpt-4o-mini", + "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", - "score":0.4524644478 + "score":0.1114053338 + }, + { + "model":"qwen\/qwen3-32b", + "bcp_47":"zu", + "task":"truthfulqa", + "metric":"accuracy", + "score":0.0 } ] \ No newline at end of file